<a href="https://colab.research.google.com/github/anuramvarma/Python-Programs/blob/main/Machine_Learning/Classifications_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import Libraries


In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


###Loading the Cars Dataset to work with concept of classifications

In [35]:
cars = pd.read_csv('USA_cars_datasets[1].csv')
cars.sample(100)

Unnamed: 0.1,Unnamed: 0,price,brand,model,year,title_status,mileage,color,vin,lot,state,country,condition
967,967,15400,dodge,caravan,2019,clean vehicle,17472.0,black,2c4rdgcg3kr747548,167580517,arizona,usa,20 hours left
635,635,14000,chevrolet,door,2017,clean vehicle,38458.0,blue,1g1ze5st9hf229260,167779685,wisconsin,usa,21 hours left
2031,2031,22000,ford,door,2014,clean vehicle,68418.0,black,1ftfw1ct6eke95627,167743891,mississippi,usa,21 hours left
125,125,10940,ford,door,2017,clean vehicle,84916.0,white,1ftew1ef1hke25038,167656368,texas,usa,2 days left
671,671,37500,chevrolet,tahoe,2019,clean vehicle,33331.0,silver,1gnskbkc1kr164515,167787087,oklahoma,usa,21 hours left
...,...,...,...,...,...,...,...,...,...,...,...,...,...
284,284,25,dodge,door,2007,salvage insurance,160528.0,dark blue,2b3ka43g87h836300,167416803,minnesota,usa,17 hours left
140,140,4260,dodge,mpv,2018,clean vehicle,33957.0,gray,3c4pdcabxjt311007,167656471,florida,usa,16 hours left
1500,1500,22800,ford,f-150,2019,clean vehicle,16506.0,white,1ftmf1c58kkc79170,167787219,oklahoma,usa,21 hours left
707,707,3000,chevrolet,door,2013,clean vehicle,120667.0,gray,1g1jc6sh6d4127084,167790930,texas,usa,9 days left


In [36]:
cars.columns

Index(['Unnamed: 0', 'price', 'brand', 'model', 'year', 'title_status',
       'mileage', 'color', 'vin', 'lot', 'state', 'country', 'condition'],
      dtype='object')

In [37]:
cars.shape

(2499, 13)

In [46]:
cars.describe()

Unnamed: 0.1,Unnamed: 0,price,year,mileage,lot
count,2499.0,2499.0,2499.0,2499.0,2499.0
mean,1249.0,18767.671469,2016.714286,52298.69,167691400.0
std,721.543484,12116.094936,3.442656,59705.52,203877.2
min,0.0,0.0,1973.0,0.0,159348800.0
25%,624.5,10200.0,2016.0,21466.5,167625300.0
50%,1249.0,16900.0,2018.0,35365.0,167745100.0
75%,1873.5,25555.5,2019.0,63472.5,167779800.0
max,2498.0,84900.0,2020.0,1017936.0,167805500.0


In [47]:
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2499 entries, 0 to 2498
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    2499 non-null   int64  
 1   price         2499 non-null   int64  
 2   brand         2499 non-null   object 
 3   model         2499 non-null   object 
 4   year          2499 non-null   int64  
 5   title_status  2499 non-null   object 
 6   mileage       2499 non-null   float64
 7   color         2499 non-null   object 
 8   vin           2499 non-null   object 
 9   lot           2499 non-null   int64  
 10  state         2499 non-null   object 
 11  country       2499 non-null   object 
 12  condition     2499 non-null   object 
dtypes: float64(1), int64(4), object(8)
memory usage: 253.9+ KB


**Data Preprocessing**

* Check for Missing Values and Handle Them

In [38]:
print(cars.isnull().sum())
cars['mileage'] = cars['mileage'].fillna(cars['mileage'].mean())
cars.dropna(inplace=True)
print(cars.isnull().sum())

Unnamed: 0      0
price           0
brand           0
model           0
year            0
title_status    0
mileage         0
color           0
vin             0
lot             0
state           0
country         0
condition       0
dtype: int64
Unnamed: 0      0
price           0
brand           0
model           0
year            0
title_status    0
mileage         0
color           0
vin             0
lot             0
state           0
country         0
condition       0
dtype: int64


**Select Features and Target Variable**

For this classification task, we will predict the 'brand' based on other features.

In [39]:
X = cars.drop(columns=['brand', 'model', 'vin', 'state'])
y = cars['brand']
X = pd.get_dummies(X, drop_first=True)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

**Spliting the Data**

* Split the data into training and testing sets

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3,random_state=42)

##Build and Train Models

**Logistic Regression**

In [41]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

# Evaluate Logistic Regression
print("Logistic Regression:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_log_reg):.2f}")
print(f"Precision: {precision_score(y_test, y_pred_log_reg, average='macro', zero_division=0):.2f}")
print(f"Recall: {recall_score(y_test, y_pred_log_reg, average='macro', zero_division=0):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred_log_reg, average='macro', zero_division=0):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_log_reg, zero_division=0))


Logistic Regression:
Accuracy: 0.69
Precision: 0.26
Recall: 0.27
F1-Score: 0.26

Classification Report:
               precision    recall  f1-score   support

       acura       0.00      0.00      0.00         1
        audi       0.00      0.00      0.00         2
         bmw       0.00      0.00      0.00         0
       buick       0.00      0.00      0.00         5
    cadillac       0.00      0.00      0.00         0
   chevrolet       0.55      0.67      0.60        82
    chrysler       0.33      0.17      0.22         6
       dodge       0.47      0.43      0.45       125
        ford       0.78      0.82      0.80       395
         gmc       0.42      0.45      0.43        11
   heartland       1.00      1.00      1.00         1
       honda       0.00      0.00      0.00         4
     hyundai       0.00      0.00      0.00         7
    infiniti       0.50      1.00      0.67         3
      jaguar       0.00      0.00      0.00         1
        jeep       0.60      0

**Naïve Bayes**


In [42]:
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

print("Naïve Bayes:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb):.2f}")
print(f"Precision: {precision_score(y_test, y_pred_nb, average='macro', zero_division=0):.2f}")
print(f"Recall: {recall_score(y_test, y_pred_nb, average='macro', zero_division=0):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred_nb, average='macro', zero_division=0):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_nb, zero_division=0))

Naïve Bayes:
Accuracy: 0.10
Precision: 0.15
Recall: 0.13
F1-Score: 0.10

Classification Report:
                precision    recall  f1-score   support

        acura       0.00      0.00      0.00         1
         audi       0.00      0.00      0.00         2
          bmw       0.00      0.00      0.00         0
        buick       0.00      0.00      0.00         5
     cadillac       0.00      0.00      0.00         0
    chevrolet       0.25      0.07      0.11        82
     chrysler       0.04      0.17      0.06         6
        dodge       0.45      0.11      0.18       125
         ford       0.88      0.05      0.10       395
          gmc       0.00      0.00      0.00        11
    heartland       1.00      1.00      1.00         1
        honda       0.00      0.00      0.00         4
      hyundai       0.25      0.14      0.18         7
     infiniti       0.12      0.67      0.20         3
       jaguar       0.00      0.00      0.00         1
         jeep       0.

**KNN (K-Nearest Neighbors)**

In [43]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN: ")
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn):.2f}")
print(f"Precision: {precision_score(y_test, y_pred_knn, average='macro', zero_division=0):.2f}")
print(f"Recall: {recall_score(y_test, y_pred_knn, average='macro', zero_division=0):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred_knn, average='macro', zero_division=0):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn, zero_division=0))

KNN: 
Accuracy: 0.66
Precision: 0.24
Recall: 0.21
F1-Score: 0.21

Classification Report:
                precision    recall  f1-score   support

        acura       0.00      0.00      0.00         1
         audi       0.00      0.00      0.00         2
          bmw       0.00      0.00      0.00         0
        buick       0.00      0.00      0.00         5
     cadillac       0.00      0.00      0.00         0
    chevrolet       0.47      0.59      0.52        82
     chrysler       0.50      0.33      0.40         6
        dodge       0.52      0.61      0.56       125
         ford       0.79      0.76      0.78       395
          gmc       0.50      0.55      0.52        11
    heartland       0.00      0.00      0.00         1
        honda       0.00      0.00      0.00         4
      hyundai       0.00      0.00      0.00         7
     infiniti       0.67      0.67      0.67         3
       jaguar       0.00      0.00      0.00         1
         jeep       0.67     

**Decision Tree**



In [44]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
print("Decision Tree:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_dt):.2f}")
print(f"Precision: {precision_score(y_test, y_pred_dt, average='macro', zero_division=0):.2f}")
print(f"Recall: {recall_score(y_test, y_pred_dt, average='macro', zero_division=0):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred_dt, average='macro', zero_division=0):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt, zero_division=0))

Decision Tree:
Accuracy: 0.91
Precision: 0.43
Recall: 0.36
F1-Score: 0.37

Classification Report:
                precision    recall  f1-score   support

        acura       0.00      0.00      0.00         1
         audi       0.00      0.00      0.00         2
          bmw       0.00      0.00      0.00         0
        buick       0.00      0.00      0.00         5
     cadillac       0.00      0.00      0.00         0
    chevrolet       0.91      0.88      0.89        82
     chrysler       1.00      0.17      0.29         6
        dodge       0.93      0.95      0.94       125
         ford       0.96      0.95      0.96       395
          gmc       0.80      0.73      0.76        11
    heartland       0.00      0.00      0.00         1
        honda       0.50      0.75      0.60         4
      hyundai       1.00      0.43      0.60         7
     infiniti       1.00      0.67      0.80         3
       jaguar       0.00      0.00      0.00         1
         jeep       

**Support Vector Classifier**

In [45]:
svc = SVC()
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)
print("Support Vector Classifier:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svc):.2f}")
print(f"Precision: {precision_score(y_test, y_pred_svc, average='macro', zero_division=0):.2f}")
print(f"Recall: {recall_score(y_test, y_pred_svc, average='macro', zero_division=0):.2f}")
print(f"F1-Score: {f1_score(y_test, y_pred_svc, average='macro', zero_division=0):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_svc, zero_division=0))

Support Vector Classifier:
Accuracy: 0.62
Precision: 0.12
Recall: 0.10
F1-Score: 0.10

Classification Report:
               precision    recall  f1-score   support

       acura       0.00      0.00      0.00         1
        audi       0.00      0.00      0.00         2
       buick       0.00      0.00      0.00         5
   chevrolet       0.35      0.21      0.26        82
    chrysler       0.00      0.00      0.00         6
       dodge       0.60      0.17      0.26       125
        ford       0.61      0.92      0.74       395
         gmc       0.00      0.00      0.00        11
   heartland       0.00      0.00      0.00         1
       honda       0.00      0.00      0.00         4
     hyundai       0.00      0.00      0.00         7
    infiniti       0.00      0.00      0.00         3
      jaguar       0.00      0.00      0.00         1
        jeep       0.00      0.00      0.00        13
         kia       0.00      0.00      0.00         4
       lexus       0.00 