In [11]:
# ==========================================
# Analisis Dataset Car Crashes
# ==========================================

# 🧩 Step 1: Import Library
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Untuk tampilan grafik yang rapi
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (8, 5)

# 🧩 Step 2: Baca Dataset
df = pd.read_csv("car_crashes.csv")

# 🧩 Step 3: Lihat 5 Data Pertama
print("=== 5 Data Pertama ===")
display(df.head())

# 🧩 Step 4: Informasi Dataset
print("\n=== Informasi Dataset ===")
df.info()

# 🧩 Step 5: Statistik Deskriptif
print("\n=== Statistik Deskriptif ===")
display(df.describe())

# 🧩 Step 6: Cek Missing Values
print("\n=== Jumlah Missing Values ===")
print(df.isnull().sum())

# 🧩 Step 7: Visualisasi Korelasi
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Korelasi Antar Variabel dalam Data Car Crashes")
plt.show()

# 🧩 Step 8: Visualisasi Scatterplot
plt.figure(figsize=(7,5))
sns.scatterplot(x="speeding", y="alcohol", data=df, hue="total", palette="viridis", s=80)
plt.title("Hubungan antara Speeding dan Alcohol terhadap Total Kecelakaan")
plt.xlabel("Persentase Pengemudi yang Ngebut (Speeding)")
plt.ylabel("Persentase Pengemudi di bawah Pengaruh Alkohol")
plt.show()

# 🧩 Step 9: Visualisasi Barplot
plt.figure(figsize=(9,5))
sns.barplot(x="total", y="ins_premium", data=df, palette="mako")
plt.title("Total Kecelakaan vs Premi Asuransi")
plt.xlabel("Total Kecelakaan")
plt.ylabel("Premi Asuransi")
plt.show()

# 🧩 Step 10: Analisis Tambahan — Faktor Utama
correlations = df.corr()["total"].sort_values(ascending=False)
print("\n=== Korelasi dengan Total Kecelakaan ===")
display(correlations)

# 🧩 Step 11: Kesimpulan
print("\n=== Kesimpulan Awal ===")
print("""
1. Data berisi informasi tentang tingkat kecelakaan mobil di tiap negara bagian AS.
2. Faktor 'alcohol' dan 'speeding' memiliki korelasi tinggi dengan 'total' kecelakaan.
3. Tingginya premi asuransi juga sejalan dengan meningkatnya jumlah kecelakaan.
4. Analisis ini dapat digunakan untuk kebijakan keselamatan lalu lintas dan penetapan premi asuransi.
""")


=== 5 Data Pertama ===


Unnamed: 0,total,speeding,alcohol,not_distracted,no_previous,ins_premium,ins_losses,abbrev
0,18.8,7.332,5.64,18.048,15.04,784.55,145.08,AL
1,18.1,7.421,4.525,16.29,17.014,1053.48,133.93,AK
2,18.6,6.51,5.208,15.624,17.856,899.47,110.35,AZ
3,22.4,4.032,5.824,21.056,21.28,827.34,142.39,AR
4,12.0,4.2,3.36,10.92,10.68,878.41,165.63,CA



=== Informasi Dataset ===
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   total           51 non-null     float64
 1   speeding        51 non-null     float64
 2   alcohol         51 non-null     float64
 3   not_distracted  51 non-null     float64
 4   no_previous     51 non-null     float64
 5   ins_premium     51 non-null     float64
 6   ins_losses      51 non-null     float64
 7   abbrev          51 non-null     object 
dtypes: float64(7), object(1)
memory usage: 3.3+ KB

=== Statistik Deskriptif ===


Unnamed: 0,total,speeding,alcohol,not_distracted,no_previous,ins_premium,ins_losses
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,15.790196,4.998196,4.886784,13.573176,14.004882,886.957647,134.493137
std,4.122002,2.017747,1.729133,4.508977,3.764672,178.296285,24.835922
min,5.9,1.792,1.593,1.76,5.9,641.96,82.75
25%,12.75,3.7665,3.894,10.478,11.348,768.43,114.645
50%,15.6,4.608,4.554,13.857,13.775,858.97,136.05
75%,18.5,6.439,5.604,16.14,16.755,1007.945,151.87
max,23.9,9.45,10.038,23.661,21.28,1301.52,194.78



=== Jumlah Missing Values ===
total             0
speeding          0
alcohol           0
not_distracted    0
no_previous       0
ins_premium       0
ins_losses        0
abbrev            0
dtype: int64


ValueError: could not convert string to float: 'AL'

<Figure size 1000x600 with 0 Axes>

# Bagian Baru