In [None]:
# 2th, 3th Question

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

data = pd.read_csv('data.csv')
data = data.replace("", np.nan)
data = data.dropna()
data = data.drop_duplicates()

data.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values.reshape(-1,1)
X = StandardScaler().fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [3]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'poly', 'rbf'),
              'C':[1,5,10,20,30,40,50,100]}
              

svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train, y_train)
best_params = clf.best_params_

print(f"best C: {best_params['C']}\n")
print(f"best kernel: {best_params['kernel']}")

best C: 100

best kernel: linear


In [41]:
clf = SVC(C = best_params['C'], kernel = best_params['kernel'])
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      0.96      0.98       132
           1       0.94      1.00      0.97       118
           2       0.99      0.95      0.97       120
           3       0.98      0.99      0.98       130

    accuracy                           0.98       500
   macro avg       0.98      0.98      0.98       500
weighted avg       0.98      0.98      0.98       500



In [42]:
# 4th Q

In [4]:
# Soft margin

clf = SVC(C = 0.05, kernel = best_params['kernel'])
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       132
           1       0.89      0.96      0.92       118
           2       0.95      0.91      0.93       120
           3       0.96      0.95      0.96       130

    accuracy                           0.94       500
   macro avg       0.94      0.94      0.94       500
weighted avg       0.94      0.94      0.94       500



In [5]:
# Hard margin

clf = SVC(C = 1e10, kernel = best_params['kernel'])
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      0.95      0.97       132
           1       0.92      1.00      0.96       118
           2       0.97      0.93      0.95       120
           3       0.96      0.97      0.97       130

    accuracy                           0.96       500
   macro avg       0.96      0.96      0.96       500
weighted avg       0.96      0.96      0.96       500



In [None]:
# 5th Q

In [6]:
# 5-a

data_bin = data.copy()
data_bin['battery_power'] = pd.qcut(data_bin['battery_power'], 3, labels = ['small','medium','large'])
data_bin.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,small,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,medium,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,small,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,small,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,large,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [7]:
# 5-b

def replacement(string):
    if string == "small":
        return "[1,0,0]"
    
    if string == "medium":
        return "[0,1,0]"
    
    if string == "large":
        return "[0,0,1]"
    
data_bin['battery_power'] = data_bin['battery_power'].apply(lambda a: replacement(a))

data_bin.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,"[1,0,0]",0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,"[0,1,0]",1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,"[1,0,0]",1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,"[1,0,0]",1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,"[0,0,1]",1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [55]:
# 5-d

data2 = data.copy()
data2['area'] = data2.px_width * data2.px_height

data2.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range,area
0,842,0,2.2,0,1,0,7,0.6,188,2,...,756,2549,9,7,19,0,0,1,1,15120
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,1988,2631,17,3,7,1,1,0,2,1799140
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1716,2603,11,2,9,1,1,0,2,2167308
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1786,2769,16,8,11,1,0,0,2,2171776
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1212,1411,8,2,15,1,1,0,1,1464096


In [56]:
# 6th Q

In [9]:
# 6-a

data_bin = data.copy()
data_bin['battery_power'] = pd.qcut(data_bin['battery_power'], 3, labels = [1,2,3])
data_bin.head()

X = data_bin.iloc[:,:-1].values
y = data_bin.iloc[:,-1].values.reshape(-1,1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

clf = SVC(C = 100, kernel = 'linear')
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       132
           1       0.85      0.93      0.89       118
           2       0.95      0.82      0.88       120
           3       0.92      0.98      0.95       130

    accuracy                           0.92       500
   macro avg       0.92      0.92      0.92       500
weighted avg       0.92      0.92      0.92       500



In [10]:
# 6-c

X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values.reshape(-1,1)

X = StandardScaler().fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

clf = SVC(C = 100, kernel = 'linear')
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      0.96      0.98       132
           1       0.94      1.00      0.97       118
           2       0.99      0.95      0.97       120
           3       0.98      0.99      0.98       130

    accuracy                           0.98       500
   macro avg       0.98      0.98      0.98       500
weighted avg       0.98      0.98      0.98       500



In [None]:
# 6-d


X = data.iloc[:,:-1]
y = data.iloc[:,-1].values.reshape(-1,1)
X['area'] = data.px_width * data.px_height
X = X.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

clf = SVC(C = 100, kernel = 'linear')
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))

In [None]:
# 6-e


data_bin = data.copy()
data_bin['battery_power'] = pd.qcut(data_bin['battery_power'], 3, labels = [1,2,3])

X = data_bin.iloc[:,:-1]
y = data_bin.iloc[:,-1].values.reshape(-1,1)
X['area'] = data_bin.px_width * data_bin.px_height
X = X.values

X = StandardScaler().fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

clf = SVC(C = 100, kernel = 'linear')
clf.fit(X_train, y_train)

print(classification_report(y_test, clf.predict(X_test)))