In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [48]:
df = pd.read_csv('accident - accident - accident - accident.csv')
df.head()

Unnamed: 0,Age,Gender,Speed_of_Impact,Helmet_Used,Seatbelt_Used,Survived
0,56,Female,27.0,No,No,1
1,69,Female,46.0,No,Yes,1
2,46,Male,46.0,Yes,Yes,0
3,32,Male,117.0,No,Yes,0
4,60,Female,40.0,Yes,Yes,0


In [49]:
df.isnull().sum()

Age                0
Gender             1
Speed_of_Impact    3
Helmet_Used        0
Seatbelt_Used      0
Survived           0
dtype: int64

In [50]:

df['Gender'].fillna(df['Gender'].mode()[0], inplace=True) 
df['Speed_of_Impact'].fillna(df['Speed_of_Impact'].median(), inplace=True)  

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Speed_of_Impact'].fillna(df['Speed_of_Impact'].median(), inplace=True)


In [51]:
df.isnull().sum()

Age                0
Gender             0
Speed_of_Impact    0
Helmet_Used        0
Seatbelt_Used      0
Survived           0
dtype: int64

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Age              200 non-null    int64  
 1   Gender           200 non-null    object 
 2   Speed_of_Impact  200 non-null    float64
 3   Helmet_Used      200 non-null    object 
 4   Seatbelt_Used    200 non-null    object 
 5   Survived         200 non-null    int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 9.5+ KB


In [53]:
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])

le_helmet = LabelEncoder()
df['Helmet_Used'] = le_helmet.fit_transform(df['Helmet_Used'])

le_seatbelt = LabelEncoder()
df['Seatbelt_Used'] = le_seatbelt.fit_transform(df['Seatbelt_Used'])

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Age              200 non-null    int64  
 1   Gender           200 non-null    int32  
 2   Speed_of_Impact  200 non-null    float64
 3   Helmet_Used      200 non-null    int32  
 4   Seatbelt_Used    200 non-null    int32  
 5   Survived         200 non-null    int64  
dtypes: float64(1), int32(3), int64(2)
memory usage: 7.2 KB


In [55]:
X = df.drop(columns=['Survived'])
y = df['Survived']

In [56]:
X

Unnamed: 0,Age,Gender,Speed_of_Impact,Helmet_Used,Seatbelt_Used
0,56,0,27.0,0,0
1,69,0,46.0,0,1
2,46,1,46.0,1,1
3,32,1,117.0,0,1
4,60,0,40.0,1,1
...,...,...,...,...,...
195,69,0,111.0,0,1
196,30,0,51.0,0,1
197,58,1,110.0,0,1
198,20,1,103.0,0,1


In [57]:
y

0      1
1      1
2      0
3      0
4      0
      ..
195    1
196    1
197    1
198    1
199    1
Name: Survived, Length: 200, dtype: int64

In [58]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [59]:
X

Unnamed: 0,Age,Gender,Speed_of_Impact,Helmet_Used,Seatbelt_Used
0,56,0,27.0,0,0
1,69,0,46.0,0,1
2,46,1,46.0,1,1
3,32,1,117.0,0,1
4,60,0,40.0,1,1
...,...,...,...,...,...
195,69,0,111.0,0,1
196,30,0,51.0,0,1
197,58,1,110.0,0,1
198,20,1,103.0,0,1


In [60]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [61]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [62]:
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Accuracy: {accuracy:.2f}")

KNN Accuracy: 0.55


In [63]:
import pickle

with open("knn_model.pkl", "wb") as model_file:
    pickle.dump(knn, model_file)



In [64]:

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)


In [65]:

with open("label_encoders.pkl", "wb") as enc_file:
    pickle.dump({"Gender": le_gender, "Helmet_Used": le_helmet, "Seatbelt_Used": le_seatbelt}, enc_file)