<a href="https://colab.research.google.com/github/cbeckler/final_project/blob/cw_mlm/SVM_Instances-colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 # SVM MultiClass Instances Testing



In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

In [None]:
# Read in the data
# Note: The following data has been normalized between 0 and 1
#data = Path('../Resources/File_Name.csv')
#df = pd.read_csv(data)
#df.head()

# Load the iris dataset as an example
iris = datasets.load_iris()


 ## Separate the Features (X) from the Target (y)

In [None]:
# Segment the features from the target
X = iris.data
y = iris.target

 ## Split our data into training and testing

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

 ## Create a SVM Model

## Instance 1

In [None]:
# Create a multiclass SVM using the "one-vs-one" strategy
clf = svm.SVC(kernel='linear', C=1, decision_function_shape='ovr')

 ## Fit (train) or model using the training data

In [None]:
# Fit the data
clf.fit(X_train, y_train)

SVC(C=1, kernel='linear')

 ## Score the model using the test data

 ## Make predictions

In [None]:
# Make predictions using the test data
y_pred = clf.predict(X_test)
results = pd.DataFrame({
    "Prediction": y_pred, 
    "Actual": y_test
}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,2,2
2,0,0
3,1,1
4,0,0


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

Accuracy: 1.0


 # Generate Confusion Matrix

In [None]:
    from sklearn.metrics import confusion_matrix
    confusion_matrix(y_test, y_pred)

array([[11,  0,  0],
       [ 0,  7,  0],
       [ 0,  0, 12]], dtype=int64)

 # Generate Classification Report

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00         7
           2       1.00      1.00      1.00        12

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Second Instance: Sigmoid Kernel

In [None]:
clf = svm.SVC(kernel='sigmoid', C=1, decision_function_shape='ovr')

In [None]:
# Train the model
clf.fit(X_train, y_train)

SVC(C=1, kernel='sigmoid')

In [None]:
# Make predictions using the test data
y_pred = clf.predict(X_test)
results = pd.DataFrame({
    "Prediction": y_pred, 
    "Actual": y_test
}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,1,0
1,1,2
2,1,0
3,1,1
4,1,0


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.23333333333333334


In [None]:
    from sklearn.metrics import confusion_matrix
    confusion_matrix(y_test, y_pred)

array([[ 0, 11,  0],
       [ 0,  7,  0],
       [ 0, 12,  0]], dtype=int64)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.23      1.00      0.38         7
           2       0.00      0.00      0.00        12

    accuracy                           0.23        30
   macro avg       0.08      0.33      0.13        30
weighted avg       0.05      0.23      0.09        30



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Instance 3: Tune Hyperparameters

In [None]:
from sklearn.model_selection import GridSearchCV
# Define the range of hyperparameters to test
parameters = {'kernel':('linear', 'rbf', 'poly'), 'C':[1, 10, 100], 'degree':[2, 3, 4]}

In [None]:
# Create a multiclass SVM
clf = svm.SVC(decision_function_shape='ovr')

In [None]:
# Use GridSearchCV to tune the hyperparameters
grid_search = GridSearchCV(clf, parameters, cv=5, return_train_score=True)
grid_search.fit(X, y)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [1, 10, 100], 'degree': [2, 3, 4],
                         'kernel': ('linear', 'rbf', 'poly')},
             return_train_score=True)

In [None]:
# Print the best hyperparameters
print("Best Hyperparameters: ",grid_search.best_params_)

Best Hyperparameters:  {'C': 1, 'degree': 2, 'kernel': 'poly'}


In [None]:
# Print the accuracy of the best model
best_clf = grid_search.best_estimator_
y_pred = best_clf.predict(X)
print("Accuracy:", metrics.accuracy_score(y, y_pred))

Accuracy: 0.9866666666666667


In [None]:
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       1.00      0.96      0.98        50
           2       0.96      1.00      0.98        50

    accuracy                           0.99       150
   macro avg       0.99      0.99      0.99       150
weighted avg       0.99      0.99      0.99       150



## Instance 4: Transfer Learning

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
# Create a pre-trained multiclass SVM model
pre_trained_clf = svm.SVC(kernel='linear', C=1, decision_function_shape='ovr')
pre_trained_clf.fit(X_train, y_train)

SVC(C=1, kernel='linear')

In [None]:
# Fine-tune the pre-trained model on a new dataset
X_transfer, y_transfer = datasets.load_digits(return_X_y=True)
clf = svm.SVC(kernel='linear', C=1, decision_function_shape='ovr')
clf.fit(X_transfer, y_transfer)

SVC(C=1, kernel='linear')

In [None]:
# Make predictions on the test set
#y_pred = clf.predict(X_test)


In [None]:
# Print the accuracy of the model
#print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

ValueError: Found input variables with inconsistent numbers of samples: [30, 150]