In [77]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from pathlib import Path
import os
import pickle


In [78]:
#Path for data csv
data_path = os.path.join(os.getcwd(), '..', 'csv', 'dataset.csv')

In [79]:
# Load the dataset
data = pd.read_csv(data_path)
data.columns = data.columns.str.strip()

In [80]:
data

Unnamed: 0,Velocity,Lean,Pitch,Yaw,Status
0,27.807470,21.937790,0.187675,0.187675,Not Crashed
1,29.343370,32.098190,-3.914246,-17.031890,Crashed
2,5.456637,0.000954,0.273161,0.273161,Not Crashed
3,24.100100,-17.533870,0.246702,0.246702,Not Crashed
4,45.178600,-42.099580,0.208221,0.208221,Not Crashed
...,...,...,...,...,...
167,38.733310,27.477790,0.265729,0.265729,Not Crashed
168,52.069290,33.461380,0.232162,0.232162,Not Crashed
169,56.339810,38.866490,0.217463,0.217463,Not Crashed
170,38.145910,19.990100,0.221607,0.221607,Not Crashed


In [81]:
# Separate features and target variable
X = data.drop('Status', axis=1)
y = data['Status']


In [82]:
# Perform label encoding on the target variable
le = LabelEncoder()
y = le.fit_transform(y)


In [83]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [84]:
# Define a list of classifiers to evaluate
classifiers = [
    LogisticRegression(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    SVC()
]

In [85]:
# Evaluate each classifier using cross-validation
for classifier in classifiers:
    scores = cross_val_score(classifier, X_train, y_train, cv=5)
    accuracy = scores.mean()
    print(f"{classifier.__class__.__name__} Accuracy: {accuracy}")

LogisticRegression Accuracy: 0.8468253968253968
DecisionTreeClassifier Accuracy: 0.9275132275132275
RandomForestClassifier Accuracy: 0.9854497354497355
SVC Accuracy: 0.9412698412698413


In [86]:
# Select the best classifier based on cross-validation results
best_classifier = classifiers[scores.argmax()]
print(f"Best Classifier: {best_classifier.__class__.__name__}")

Best Classifier: DecisionTreeClassifier


In [87]:
# Train the best classifier on the full training set
best_classifier.fit(X_train, y_train)


In [88]:
# Evaluate the best classifier on the test set
test_accuracy = best_classifier.score(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.9142857142857143


In [89]:
## Test the model in action
model = best_classifier

In [90]:
# Define the data for a single incident
data = {
    'Velocity': [27.80747],
    'Lean': [21.93779],
    'Pitch': [0.1876749],
    'Yaw': [0.1876749]
}


In [91]:
# Create a DataFrame from the data
X = pd.DataFrame(data)

In [92]:
# Make a prediction on the single incident data
prediction = model.predict(X)

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- yaw
Feature names seen at fit time, yet now missing:
- Yaw


In [75]:
# Decode the predicted label back to its original category
predicted_category = le.inverse_transform(prediction)


In [76]:
predicted_category

array(['Not Crashed'], dtype=object)

In [55]:
# Export model
with open('trained_model.pkl', 'wb') as f:
    pickle.dump(model, f)