<a href="https://colab.research.google.com/github/aliiamrr/Model-Evaluation-Project/blob/main/Model_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install --upgrade category_encoders

In [None]:
#IMPORTS

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, mean_squared_error



In [None]:
#READING THE DATA FRAME
df = pd.read_csv("/content/CarPrice_Assignment.csv")

In [None]:
#NOMINAL:
nominal_columns = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']

#PERFORMING ONE HOT ENCODING FOR THE NOMINAL COLUMNS

one_hot_df = pd.get_dummies(df[nominal_columns], prefix=nominal_columns)
one_hot_df.head()

#REPLACING THE ENCODED COLUMNS WITH THE OLD ONES
discretized_df = df.drop(nominal_columns, axis=1)
discretized_df = pd.concat([discretized_df,one_hot_df],axis =1)



In [None]:
#CATEGORIZING THE PRICE COLUMN


bins = [0, 10000, 20000, 30000, 40000, np.inf]
labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High']

price_bins = pd.cut(df['price'], bins=bins, labels=labels)
discretized_df['price'] = price_bins


In [None]:
#SPLITTING INTO X AND Y VARIABLES
y=discretized_df["price"]
X=discretized_df.drop("price", axis=1)

#TRAIN TEST SPLIT
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#NAIVE BAYES

In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
#INSTANTIATING THE MODEL
model = GaussianNB()

#TRAINING MODEL
model.fit(X_train, y_train)

#MAKING PREDICTIONS
y_pred = model.predict(X_test)

3
#CALCULATING THE ACCURACY/CONFUSION MATRIX/RECALL AND PRECISION
accuracy_nb = accuracy_score(y_test, y_pred)
confusion_mat_nb = confusion_matrix(y_test, y_pred)
recall_nb = recall_score(y_test, y_pred, average='weighted')
precision_nb= precision_score(y_test, y_pred, average='weighted')


In [None]:
#NAIVE BAYES PRINT

print("NAIVE BAYES RESULTS: ")
print("Accuracy of the decision naive bayes is : ",accuracy_nb)
print("confusion matrix of the naive bayes is : ",confusion_mat_nb)
print("Recall of the  naive bayes is : ",recall_nb)
print("Precision of the  naive bayes is : ",precision_nb)
print("----------------------------------------------------------------------------------------------------------------------------------------------------------------------")


NAIVE BAYES RESULTS: 
Accuracy of the decision naive bayes is :  0.6341463414634146
confusion matrix of the naive bayes is :  [[ 0  0  1  0  0]
 [ 0  7  1  0  4]
 [ 1  2  1  0  0]
 [ 1  0  1  0  0]
 [ 0  3  1  0 18]]
Recall of the  naive bayes is :  0.6341463414634146
Precision of the  naive bayes is :  0.6292682926829268
----------------------------------------------------------------------------------------------------------------------------------------------------------------------


In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
#DECSISION TREE

In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
#INSTANTIATING THE MODEL
decision_tree = DecisionTreeClassifier()

#TRAINING MODEL
decision_tree.fit(X_train, y_train)

#MAKING PREDICTIONS
prediction = decision_tree.predict(X_test)

#CALCULATING THE ACCURACY/CONFUSION MATRIX/RECALL AND PRECISION
accuracy_dt = accuracy_score(y_test, prediction)
conf_matrix_dt = confusion_matrix(y_test, prediction)
recall_dt = recall_score(y_test, prediction, average='weighted')
precision_dt = precision_score(y_test,prediction, average='weighted')


In [None]:
#DECSISION TREE PRINT

print("DECISION TREE RESULTS: ")
print("Accuracy of the decision tree model is : ",accuracy_dt)
print("confusion matrix of the  decision tree model is : ",conf_matrix_dt)
print("Recall of the decision tree model is : ",recall_dt)
print("Precision of the decision tree model is : ",precision_dt)
print("----------------------------------------------------------------------------------------------------------------------------------------------------------------------")



DECISION TREE RESULTS: 
Accuracy of the decision tree model is :  0.8292682926829268
confusion matrix of the  decision tree model is :  [[ 1  0  0  0  0]
 [ 0 12  0  0  0]
 [ 3  1  0  0  0]
 [ 1  0  0  1  0]
 [ 0  2  0  0 20]]
Recall of the decision tree model is :  0.8292682926829268
Precision of the decision tree model is :  0.824390243902439
----------------------------------------------------------------------------------------------------------------------------------------------------------------------


In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
#KNN:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# KNN Classfier

#INSTANTIATING THE KNN MODEL WITH 3 N NEIGHNOURS
knn_classifier = KNeighborsClassifier(n_neighbors=3)

#TRAINING MODEL
knn_classifier.fit(X_train, y_train)

#MAKING PREDICTIONS
y_pred_knn_classifier = knn_classifier.predict(X_test)

#CALCULATING THE ACCURACY/CONFUSION MATRIX/RECALL AND PRECISION
accuracy_knn_classifier = accuracy_score(y_test, y_pred_knn_classifier)
conf_matrix_knn_classifier = confusion_matrix(y_test, y_pred_knn_classifier)
recall_knn_classifier = recall_score(y_test, y_pred_knn_classifier, average='weighted')
precision_knn_classifier = precision_score(y_test, y_pred_knn_classifier, average='weighted')


In [None]:
# KNN Regressor

#REDIFING THE Y TO MAKE IT CONTINOUS
#Y USES OLD DATA FRAME WHICH CONTAINS THE Y CONTINOUS VALUES(ASSUMING THAT THE REQUIRED OUTPUT NEEDS THE CONTINOUS PRICE AS THE TARGET VARIABLE)

y=df["price"]

X=discretized_df.drop("price", axis=1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#INSTANTIATING REGRESSOR
knn_regressor = KNeighborsRegressor(n_neighbors=3)

#TRAINING MODEL
knn_regressor.fit(X_train, y_train)

#MAKING PREDICTIONS
knn_regressor_pred = knn_regressor.predict(X_test)

#CALCULATING MEAN SQUARED ERROR
mse_knn_regressor = mean_squared_error(y_test,knn_regressor_pred )



In [None]:
#KNN RESULTS
print("KNN REULTS : ")

print("\nCLASSIFIER: ")

# Results For Classifier
print("K-Nearest Neighbors (Classifier) - Accuracy:", accuracy_knn_classifier)
print("K-Nearest Neighbors (Classifier) - Confusion Matrix:\n", conf_matrix_knn_classifier)
print("K-Nearest Neighbors (Classifier) - Recall:", recall_knn_classifier)
print("K-Nearest Neighbors (Classifier) - Precision:", precision_knn_classifier)

print("\nREGRESSOR: ")


# Results for Regressor
print("\nK-Nearest Neighbors (Regressor) - Mean Squared Error:", mse_knn_regressor)


KNN REULTS : 

CLASSIFIER: 
K-Nearest Neighbors (Classifier) - Accuracy: 0.7317073170731707
K-Nearest Neighbors (Classifier) - Confusion Matrix:
 [[ 0  1  0  0  0]
 [ 0 10  0  0  2]
 [ 1  3  0  0  0]
 [ 2  0  0  0  0]
 [ 0  2  0  0 20]]
K-Nearest Neighbors (Classifier) - Recall: 0.7317073170731707
K-Nearest Neighbors (Classifier) - Precision: 0.6707317073170732

REGRESSOR: 

K-Nearest Neighbors (Regressor) - Mean Squared Error: 15649836.334728999


In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
#EVALUATING THE MODELS

#MAX ACCURACY
acc = {'KNN': accuracy_knn_classifier,'Naive Bayes':accuracy_nb,'Desicion Tree':accuracy_dt }
maxAcc = max(acc,key=acc.get)
print("The Model with the maximum accuracy is : ",maxAcc)

#MAX RECALL
recall= {'KNN': recall_knn_classifier,'Naive Bayes':recall_nb,'Desicion Tree':recall_dt }
maxRecall = max(recall,key=recall.get)
print("The Model with the maximum recall is : ",maxRecall)

#MAX PRECISION
precision = {'KNN': precision_knn_classifier,'Naive Bayes':precision_nb,'Desicion Tree':precision_dt }
maxPrecision =max(precision,key =precision.get)
print("The Model with the maximum precision is : ",maxPrecision)

#BASED ON THE RESULTS DECISION TREE HAS MAX ACCURACY/RECALL/PRECISION
#DESICION TREE ALSO HAS THE BEST CONFUSION MATRIX SINCE THERE ARE MORE CORRECTLY PREDICTED VALUES(MORE RESULT IN MAIN DIAGONAL IN EACH ROW)



The Model with the maximum accuracy is :  Desicion Tree
The Model with the maximum recall is :  Desicion Tree
The Model with the maximum precision is :  Desicion Tree
