### Support Vector Machine

In [1]:
#getting synthetic dataset

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=5, random_state=42)

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)


In [3]:
# standardize the data
from sklearn.preprocessing import StandardScaler   
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
#training the svm model with rbf kernel
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=2.0, gamma='scale')  #hyperparameters 
svm_model.fit(X_train, y_train)


In [5]:
svm_model.get_params()

{'C': 2.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [6]:
#predictions
y_pred = svm_model.predict(X_test)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
#Evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Consufion matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))

Accuracy: 0.88
Consufion matrix:
 [[90  7]
 [17 86]]
Classification report:
               precision    recall  f1-score   support

           0       0.84      0.93      0.88        97
           1       0.92      0.83      0.88       103

    accuracy                           0.88       200
   macro avg       0.88      0.88      0.88       200
weighted avg       0.88      0.88      0.88       200



In [7]:
#Hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid ={
    'C' : [0.1,1,2,10],
    'kernel' : ['linear', 'rbf', 'poly'],
    'gamma' : ['scale','auto']
}

#performing grid search
grid_search = GridSearchCV(SVC(),param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

#Best parameters and accuracy
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

Best parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best cross-validation accuracy: 0.8700000000000001


In [8]:
#Real world data

from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

param_grid ={
    'C' : [0.1,1,2,10],
    'kernel' : ['linear', 'rbf', 'poly'],
    'gamma' : ['scale','auto']
}

data = load_iris()
X, y = data.data, data.target

#splitting the data
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)

#standardizing the data
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#training the svm model 
grid_search = GridSearchCV(SVC(),param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

#Best parameters and accuracy
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

Best parameters: {'C': 2, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation accuracy: 0.9666666666666668


In [9]:
iris = load_iris()
X, y = iris.data, iris.target
class_names = iris.target_names

In [10]:
svm_model = SVC(kernel='linear', C=2.0, gamma='scale')  #hyperparameters 
svm_model.fit(X_train, y_train)

#predictions
y_pred = svm_model.predict(X_test)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
#Evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9666666666666667
Confusion matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30



In [11]:
import pickle

In [12]:
iris_classification_model = (svm_model, scaler)

In [13]:
model_filename = 'iris_classification_model.pkl'
with open(model_filename, 'wb') as f:
    pickle.dump((iris_classification_model, class_names), f)

In [14]:
with open ("iris_classification_model.pkl", 'rb') as f:
    loaded_model = pickle.load(f)
    #  loaded_model, loaded_class_names = pickle.load(f)

In [15]:
svm_model, scaler = loaded_model

In [16]:
import numpy as np
sample = [X_test[1]]
prediction = svm_model.predict(sample)
print("Predicted class index:", prediction[0])
# print("Predicted class name:", loaded_class_names[prediction[0]])

AttributeError: 'tuple' object has no attribute 'predict'

In [None]:
y_test[1]

0

In [None]:
# #assignment 
# #load iris first
# then from  pandas load the data and perform analysis 
# then work on the data using svm model for prediction

In [22]:
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

load_iris = load_iris()

In [28]:
import pandas as pd
iris_df = pd.DataFrame(data.data, columns=data.feature_names)

In [29]:
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3
