In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score
from sklearn.preprocessing import LabelEncoder

In [None]:
data=pd.read_csv('CarPrice_Assignment.csv')

In [None]:
print(data.shape[0])
#data preProcessing
preproData=data.drop_duplicates() #getting rid of dupplicates
print('---------')
print(preproData.shape[0]) #no dupplicates in the original data

205
---------
205


In [None]:
preproData.isnull().sum()#checking if there are any missing values to handle
#no missing values

car_ID              0
symboling           0
CarName             0
fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

In [None]:
data=preproData

In [None]:
data['price_category'] = pd.qcut(data['price'], q=4, labels=['Low', 'Medium', 'High','very High'])
print(data) #converting the cont values in column price into discrete values

     car_ID  symboling                   CarName fueltype aspiration  \
0         1          3        alfa-romero giulia      gas        std   
1         2          3       alfa-romero stelvio      gas        std   
2         3          1  alfa-romero Quadrifoglio      gas        std   
3         4          2               audi 100 ls      gas        std   
4         5          2                audi 100ls      gas        std   
..      ...        ...                       ...      ...        ...   
200     201         -1           volvo 145e (sw)      gas        std   
201     202         -1               volvo 144ea      gas      turbo   
202     203         -1               volvo 244dl      gas        std   
203     204         -1                 volvo 246   diesel      turbo   
204     205         -1               volvo 264gl      gas      turbo   

    doornumber      carbody drivewheel enginelocation  wheelbase  ...  \
0          two  convertible        rwd          front       88

In [None]:
# Label encode non-numeric columns
label_encoders = {}
label_encoders['CarName'] = LabelEncoder()
data['CarName'] = label_encoders['CarName'].fit_transform(data['CarName'])

label_encoders['fueltype'] = LabelEncoder()
data['fueltype'] = label_encoders['fueltype'].fit_transform(data['fueltype'])

label_encoders['aspiration'] = LabelEncoder()
data['aspiration'] = label_encoders['aspiration'].fit_transform(data['aspiration'])

label_encoders['doornumber'] = LabelEncoder()
data['doornumber'] = label_encoders['doornumber'].fit_transform(data['doornumber'])

label_encoders['carbody'] = LabelEncoder()
data['carbody'] = label_encoders['carbody'].fit_transform(data['carbody'])

label_encoders['drivewheel'] = LabelEncoder()
data['drivewheel'] = label_encoders['drivewheel'].fit_transform(data['drivewheel'])

label_encoders['enginelocation'] = LabelEncoder()
data['enginelocation'] = label_encoders['enginelocation'].fit_transform(data['enginelocation'])

label_encoders['enginetype'] = LabelEncoder()
data['enginetype'] = label_encoders['enginetype'].fit_transform(data['enginetype'])

label_encoders['cylindernumber'] = LabelEncoder()
data['cylindernumber'] = label_encoders['cylindernumber'].fit_transform(data['cylindernumber'])

label_encoders['fuelsystem'] = LabelEncoder()
data['fuelsystem'] = label_encoders['fuelsystem'].fit_transform(data['fuelsystem'])

encoder = LabelEncoder()

encoder.fit(['Low', 'Medium', 'High','very High'])
data["price_category"] = encoder.transform(data["price_category"])


In [None]:
data.dtypes

car_ID                int64
symboling             int64
CarName               int64
fueltype              int64
aspiration            int64
doornumber            int64
carbody               int64
drivewheel            int64
enginelocation        int64
wheelbase           float64
carlength           float64
carwidth            float64
carheight           float64
curbweight            int64
enginetype            int64
cylindernumber        int64
enginesize            int64
fuelsystem            int64
boreratio           float64
stroke              float64
compressionratio    float64
horsepower            int64
peakrpm               int64
citympg               int64
highwaympg            int64
price               float64
price_category        int64
dtype: object

In [None]:
X = data.drop(['price_category','price','car_ID'], axis=1)  # Features
y = data['price_category']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) #creating training and testing sets
#test size=20% of the data ,random state (seed for randomness)=0
print('x:',X)
print('----------------------------------------------------------------------')
print('y:',y)
print('----------------------------------------------------------------------')
print('x train : ',X_train)
print('----------------------------------------------------------------------')
print('y train : ',y_train)
print('----------------------------------------------------------------------')
print('x test :',X_test)
print('----------------------------------------------------------------------')
print('y test :',y_test)




x:      symboling  CarName  fueltype  aspiration  doornumber  carbody  \
0            3        2         1           0           1        0   
1            3        3         1           0           1        0   
2            1        1         1           0           1        2   
3            2        4         1           0           0        3   
4            2        5         1           0           0        3   
..         ...      ...       ...         ...         ...      ...   
200         -1      139         1           0           0        3   
201         -1      138         1           1           0        3   
202         -1      140         1           0           0        3   
203         -1      142         0           1           0        3   
204         -1      143         1           1           0        3   

     drivewheel  enginelocation  wheelbase  carlength  ...  cylindernumber  \
0             2               0       88.6      168.8  ...               2   


**Decision** **tree**

In [None]:
decision_tree = DecisionTreeClassifier(random_state=0)
decision_tree.fit(X_train, y_train)

# Make predictions
dty_pred = decision_tree.predict(X_test)
print(dty_pred)

[1 0 0 1 2 2 1 2 3 1 3 3 0 0 1 2 2 3 2 1 2 3 2 2 3 2 1 3 1 1 2 2 3 0 1 3 2
 3 1 3 1]


In [None]:
accuracy = accuracy_score(y_test, dty_pred)
conf_matrix = confusion_matrix(y_test,dty_pred)
recall = recall_score(y_test, dty_pred, average='weighted')
precision = precision_score(y_test, dty_pred, average='weighted')
print("Decision Tree Classifier Performance:")
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Recall:", recall)
print("Precision:", precision)

Decision Tree Classifier Performance:
Accuracy: 0.7073170731707317
Confusion Matrix:
 [[3 0 4 2]
 [0 9 1 0]
 [1 3 8 0]
 [1 0 0 9]]
Recall: 0.7073170731707317
Precision: 0.6943032577178918


**KNN**

In [None]:
knnClass = KNeighborsClassifier(n_neighbors=5 , weights = 'uniform' , algorithm = 'auto') #default no. of neighbours , each point in each neighbour has same weight , decides most appropriate algo to use
knnClass.fit(X_train , y_train) #fit model
Knnpred = knnClass.predict(X_test)
print(Knnpred)

[1 0 0 1 2 0 1 1 0 1 0 3 0 0 1 2 0 3 2 1 2 3 2 0 3 1 1 3 1 1 2 0 3 0 1 3 1
 3 1 3 1]


In [None]:
knnReg = KNeighborsRegressor(n_neighbors=5 , weights = 'uniform' , algorithm = 'auto') #default no. of neighbours , each point in each neighbour has same weight , decides most appropriate algo to use
knnReg.fit(X_train , y_train) #fit model
Knnpred = knnReg.predict(X_test)
print(Knnpred)
mean_squared_error(y_test , Knnpred)

[1.  1.2 0.8 1.  1.4 1.4 1.  1.4 1.2 1.2 1.  3.  0.4 1.  1.  1.2 1.4 3.
 1.8 1.  1.2 1.8 1.8 1.4 1.8 1.  1.2 1.8 1.4 1.4 2.  0.8 1.8 0.8 1.2 2.4
 1.2 1.8 1.  3.  1. ]


0.9063414634146341

In [None]:
label_encoder = LabelEncoder()
yTest_encoded = label_encoder.fit_transform(y_test)
pred_encoded = label_encoder.transform(Knnpred)

In [None]:
acc = accuracy_score(yTest_encoded , pred_encoded)
print('knn:')
print("Accuracy:",acc)
cmat=confusion_matrix(yTest_encoded , pred_encoded)
print("Confusion Matrix:\n",cmat)
rs=recall_score(yTest_encoded , pred_encoded , average = "weighted")
ps=precision_score(yTest_encoded , pred_encoded , average = "weighted")
print("Recall:", rs)
print("Precision:", ps)

knn:
Accuracy: 0.4146341463414634
Confusion Matrix:
 [[ 3  6  0  0]
 [ 0 10  0  0]
 [ 1 10  1  0]
 [ 0  6  1  3]]
Recall: 0.4146341463414634
Precision: 0.6310975609756098


**Naive bayes**

In [None]:
nb_model = GaussianNB()


nb_model.fit(X_train, y_train)

# Predict on the test set
y_pred = nb_model.predict(X_test)
print(y_pred)


[1 3 0 0 2 0 1 2 0 1 0 3 0 0 1 0 2 3 2 1 2 0 2 0 0 1 1 3 1 1 2 2 0 2 1 3 2
 0 1 3 1]


In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(" Naive bayes Accuracy:", accuracy)

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("naive bayes Confusion Matrix:")
print(cm)

# Compute recall
recall = recall_score(y_test, y_pred, average='weighted')
print(" naive bayes Recall:", recall)

# Compute precision
precision = precision_score(y_test, y_pred, average='weighted')
print("naive bayes Precision:", precision)

 Naive bayes Accuracy: 0.6829268292682927
naive bayes Confusion Matrix:
[[6 0 2 1]
 [1 9 0 0]
 [1 3 8 0]
 [5 0 0 5]]
 naive bayes Recall: 0.6829268292682927
naive bayes Precision: 0.7216385240775486


**decesion tree performed better because it is more accurate**