In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Load the dataset
df = pd.read_csv('Fuel_Efficiency_Updated.csv')

# Function to determine driving style
def determine_driving_style(row):
    if (row['Average_Speed_kmh'] > 130 or
        row['Acceleration_Pattern'] == 'aggressive' or
        row['Braking_Pattern'] == 'hard' or
        row['Traffic_Violations'] > 4):
        return 'aggressive'
    elif (row['Average_Speed_kmh'] < 60 or
          row['Acceleration_Pattern'] == 'conservative' or
          row['Braking_Pattern'] == 'gentle' or
          row['Traffic_Violations'] <= 2):
        return 'conservative'
    else:
        return 'moderate'

In [4]:
# Apply the function to each row
df['Driving_Style'] = df.apply(determine_driving_style, axis=1)

# Select required columns
df = df[['Driving_Duration_hrs', 'Average_Speed_kmh', 'Acceleration_Pattern',
         'Braking_Pattern', 'Driving_Style', 'Safety_Features_Usage',
         'Traffic_Violations']].copy()

In [5]:
# Convert categorical variables to numerical
le = LabelEncoder()
for column in ['Acceleration_Pattern', 'Braking_Pattern', 'Driving_Style', 'Safety_Features_Usage']:
    df[column] = le.fit_transform(df[column])

In [6]:
# Split data into features (X) and target (y)
X = df.drop('Driving_Style', axis=1)
y = df['Driving_Style']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Perform GridSearchCV to find the best C value
param_grid = {'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
grid_search = GridSearchCV(SVC(kernel='rbf', gamma='scale'), param_grid, cv=10)
grid_search.fit(X, y)
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best parameters:", best_params)
print("Best cross-validation score:", best_score)

Best parameters: {'C': 10}
Best cross-validation score: 0.8915507328846418


In [8]:
# Define the SVM model with the best parameters
svm_model = SVC(kernel='rbf', C=best_params['C'], gamma='scale')
svm_model.fit(X_train, y_train)

In [9]:
# Evaluate the model on training and testing sets
train_accuracy = svm_model.score(X_train, y_train)
test_accuracy = svm_model.score(X_test, y_test)
print("Training Accuracy:", train_accuracy)
print("Validation Accuracy:", test_accuracy)

Training Accuracy: 0.8821250691754289
Validation Accuracy: 0.8860619469026548


In [10]:
# Perform cross-validation with the best parameters
cross_val_scores = cross_val_score(svm_model, X, y, cv=10)
print("Cross-validation scores:", cross_val_scores)
print("Mean cross-validation score:", cross_val_scores.mean())

Cross-validation scores: [0.90265487 0.86061947 0.87168142 0.8960177  0.88716814 0.86061947
 0.92035398 0.90707965 0.89800443 0.9113082 ]
Mean cross-validation score: 0.8915507328846418


In [11]:
# Classification report
y_pred = svm_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.85      0.85       288
           1       0.96      0.96      0.96       414
           2       0.79      0.79      0.79       202

    accuracy                           0.89       904
   macro avg       0.87      0.87      0.87       904
weighted avg       0.89      0.89      0.89       904



In [12]:
# Function to map numerical labels back to driving style names
def get_driving_style_label(label):
    driving_styles = {0: 'aggressive', 1: 'Conservative', 2: 'moderate'}
    return driving_styles.get(label, "Unknown")

In [13]:
# # Input new data for prediction
# print("Enter the values for the features:")
# Driving_Duration_hrs = float(input("Driving Duration (hours): "))
# Average_Speed_kmh = float(input("Average Speed (km/h): "))
# Acceleration_Pattern = int(input("Acceleration Pattern (0: Conservative, 1: Moderate, 2: Aggressive): "))
# Braking_Pattern = int(input("Braking Pattern (0: Gentle, 1: Normal, 2: Hard): "))
# Safety_Features_Usage = int(input("Safety Features Usage (0: No, 1: Yes): "))
# Traffic_Violations = int(input("Traffic Violations (0 to 5): "))

# # Create a new input DataFrame
# new_input_df = pd.DataFrame({
#     'Driving_Duration_hrs': [Driving_Duration_hrs],
#     'Average_Speed_kmh': [Average_Speed_kmh],
#     'Acceleration_Pattern': [Acceleration_Pattern],
#     'Braking_Pattern': [Braking_Pattern],
#     'Safety_Features_Usage': [Safety_Features_Usage],
#     'Traffic_Violations': [Traffic_Violations]
# })

# # Make prediction
# prediction = svm_model.predict(new_input_df)

# # Print the predicted driving style
# predicted_driving_style_label = get_driving_style_label(prediction[0])
# print("Predicted Driving Style:", predicted_driving_style_label)

In [14]:
import pickle
with open('svm_model.pkl', 'wb') as f:
    pickle.dump(svm_model, f)
 