In [42]:
import pandas as pd
import numpy as np
from ISLP import load_data
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [43]:
# Load the OJ dataset and split
OJ = load_data('OJ')

In [44]:
OJ.head(15)

Unnamed: 0,Purchase,WeekofPurchase,StoreID,PriceCH,PriceMM,DiscCH,DiscMM,SpecialCH,SpecialMM,LoyalCH,SalePriceMM,SalePriceCH,PriceDiff,Store7,PctDiscMM,PctDiscCH,ListPriceDiff,STORE
0,CH,237,1,1.75,1.99,0.0,0.0,0,0,0.5,1.99,1.75,0.24,No,0.0,0.0,0.24,1
1,CH,239,1,1.75,1.99,0.0,0.3,0,1,0.6,1.69,1.75,-0.06,No,0.150754,0.0,0.24,1
2,CH,245,1,1.86,2.09,0.17,0.0,0,0,0.68,2.09,1.69,0.4,No,0.0,0.091398,0.23,1
3,MM,227,1,1.69,1.69,0.0,0.0,0,0,0.4,1.69,1.69,0.0,No,0.0,0.0,0.0,1
4,CH,228,7,1.69,1.69,0.0,0.0,0,0,0.956535,1.69,1.69,0.0,Yes,0.0,0.0,0.0,0
5,CH,230,7,1.69,1.99,0.0,0.0,0,1,0.965228,1.99,1.69,0.3,Yes,0.0,0.0,0.3,0
6,CH,232,7,1.69,1.99,0.0,0.4,1,1,0.972182,1.59,1.69,-0.1,Yes,0.201005,0.0,0.3,0
7,CH,234,7,1.75,1.99,0.0,0.4,1,0,0.977746,1.59,1.75,-0.16,Yes,0.201005,0.0,0.24,0
8,CH,235,7,1.75,1.99,0.0,0.4,0,0,0.982197,1.59,1.75,-0.16,Yes,0.201005,0.0,0.24,0
9,CH,238,7,1.75,1.99,0.0,0.4,0,0,0.985757,1.59,1.75,-0.16,Yes,0.201005,0.0,0.24,0


In [45]:
# Encode categorical variables
OJ_encoded = pd.get_dummies(OJ, drop_first=True)

OJ_encoded.head(15)

Unnamed: 0,WeekofPurchase,StoreID,PriceCH,PriceMM,DiscCH,DiscMM,SpecialCH,SpecialMM,LoyalCH,SalePriceMM,SalePriceCH,PriceDiff,PctDiscMM,PctDiscCH,ListPriceDiff,STORE,Purchase_MM,Store7_Yes
0,237,1,1.75,1.99,0.0,0.0,0,0,0.5,1.99,1.75,0.24,0.0,0.0,0.24,1,False,False
1,239,1,1.75,1.99,0.0,0.3,0,1,0.6,1.69,1.75,-0.06,0.150754,0.0,0.24,1,False,False
2,245,1,1.86,2.09,0.17,0.0,0,0,0.68,2.09,1.69,0.4,0.0,0.091398,0.23,1,False,False
3,227,1,1.69,1.69,0.0,0.0,0,0,0.4,1.69,1.69,0.0,0.0,0.0,0.0,1,True,False
4,228,7,1.69,1.69,0.0,0.0,0,0,0.956535,1.69,1.69,0.0,0.0,0.0,0.0,0,False,True
5,230,7,1.69,1.99,0.0,0.0,0,1,0.965228,1.99,1.69,0.3,0.0,0.0,0.3,0,False,True
6,232,7,1.69,1.99,0.0,0.4,1,1,0.972182,1.59,1.69,-0.1,0.201005,0.0,0.3,0,False,True
7,234,7,1.75,1.99,0.0,0.4,1,0,0.977746,1.59,1.75,-0.16,0.201005,0.0,0.24,0,False,True
8,235,7,1.75,1.99,0.0,0.4,0,0,0.982197,1.59,1.75,-0.16,0.201005,0.0,0.24,0,False,True
9,238,7,1.75,1.99,0.0,0.4,0,0,0.985757,1.59,1.75,-0.16,0.201005,0.0,0.24,0,False,True


In [46]:
# Separate features and response
X = OJ_encoded.drop(columns='Purchase_MM')  # Purchase_MM is 1 if MM, 0 if CH
y = OJ_encoded['Purchase_MM']

# Split: 800 for training, rest for test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=800, random_state=42)

# Standardize predictors
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [47]:
y.head()

0    False
1    False
2    False
3     True
4    False
Name: Purchase_MM, dtype: bool

In [48]:
# Fit SVC with linear kernel and C=0.01
svc_linear = SVC(kernel='linear', C=0.01)
svc_linear.fit(X_train_scaled, y_train)

# Number of support vectors
n_support_vectors = svc_linear.n_support_.sum()
print("Number of support vectors:", n_support_vectors)

Number of support vectors: 430


In [49]:
# Predict on training and test sets
train_preds = svc_linear.predict(X_train_scaled)
test_preds = svc_linear.predict(X_test_scaled)

# Compute error rates
train_error = 1 - accuracy_score(y_train, train_preds)
test_error = 1 - accuracy_score(y_test, test_preds)

print(f"Training error rate: {train_error:.4f}")
print(f"Test error rate: {test_error:.4f}")

Training error rate: 0.1600
Test error rate: 0.1963


In [50]:
grid_linear = GridSearchCV(SVC(kernel='linear'), C_values, cv=10)
grid_linear.fit(X_train_scaled, y_train)

best_C_linear = grid_linear.best_params_['C']
print(f"Optimal C for Linear kernel: {best_C_linear}")

Optimal C for Linear kernel: 0.01


In [51]:
# Refit SVC with optimal C
svc_best_linear = SVC(kernel='linear', C=best_C_linear)
svc_best_linear.fit(X_train_scaled, y_train)

# Predict and compute error rates
train_preds_best = svc_best_linear.predict(X_train_scaled)
test_preds_best = svc_best_linear.predict(X_test_scaled)

train_error_best = 1 - accuracy_score(y_train, train_preds_best)
test_error_best = 1 - accuracy_score(y_test, test_preds_best)

print(f"Training error rate with best C ({best_C_linear}): {train_error_best:.4f}")
print(f"Test error rate with best C ({best_C_linear}): {test_error_best:.4f}")

Training error rate with best C (0.01): 0.1600
Test error rate with best C (0.01): 0.1963


**F:**

In [52]:
# RBF kernel SVM with C = 0.01
svc_rbf = SVC(kernel='rbf', C=0.01)
svc_rbf.fit(X_train_scaled, y_train)

# Number of support vectors
n_support_rbf = svc_rbf.n_support_.sum()
print("Number of support vectors (RBF, C=0.01):", n_support_rbf)


Number of support vectors (RBF, C=0.01): 617


In [53]:
train_preds_rbf = svc_rbf.predict(X_train_scaled)
test_preds_rbf = svc_rbf.predict(X_test_scaled)

train_error_rbf = 1 - accuracy_score(y_train, train_preds_rbf)
test_error_rbf = 1 - accuracy_score(y_test, test_preds_rbf)

print(f"Training error (RBF, C=0.01): {train_error_rbf:.4f}")
print(f"Test error (RBF, C=0.01): {test_error_rbf:.4f}")


Training error (RBF, C=0.01): 0.3825
Test error (RBF, C=0.01): 0.4111


In [61]:
# Hyperparameter ranges to search
param_grid_rbf = {
    'C': [0.01, 0.1, 1, 10, 100],  # different scales
}

# Grid search with 10-fold cross-validation
grid_rbf = GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid_rbf, cv=10)
grid_rbf.fit(X_train_scaled, y_train)

# Best parameters
best_C_rbf = grid_rbf.best_params_['C']

print(f"Optimal C for RBF kernel: {best_C_rbf}")



Optimal C for RBF kernel: 1


In [62]:
svc_best_rbf = SVC(kernel='rbf', C=best_C_rbf, gamma='scale')
svc_best_rbf.fit(X_train_scaled, y_train)

train_preds_best_rbf = svc_best_rbf.predict(X_train_scaled)
test_preds_best_rbf = svc_best_rbf.predict(X_test_scaled)

train_error_best_rbf = 1 - accuracy_score(y_train, train_preds_best_rbf)
test_error_best_rbf = 1 - accuracy_score(y_test, test_preds_best_rbf)

print(f"Training error (RBF): {train_error_best_rbf:.4f}")
print(f"Test error (RBF): {test_error_best_rbf:.4f}")


Training error (RBF): 0.1462
Test error (RBF): 0.1889


**G:**

In [63]:
# Polynomial kernel SVM with degree=2 and C=0.01
svc_poly = SVC(kernel='poly', degree=2, C=0.01)
svc_poly.fit(X_train_scaled, y_train)

# Number of support vectors
n_support_poly = svc_poly.n_support_.sum()
print("Number of support vectors (Poly, C=0.01):", n_support_poly)


Number of support vectors (Poly, C=0.01): 618


In [64]:
train_preds_poly = svc_poly.predict(X_train_scaled)
test_preds_poly = svc_poly.predict(X_test_scaled)

train_error_poly = 1 - accuracy_score(y_train, train_preds_poly)
test_error_poly = 1 - accuracy_score(y_test, test_preds_poly)

print(f"Training error (Poly, C=0.01): {train_error_poly:.4f}")
print(f"Test error (Poly, C=0.01): {test_error_poly:.4f}")


Training error (Poly, C=0.01): 0.3675
Test error (Poly, C=0.01): 0.4074


In [65]:
grid_poly = GridSearchCV(SVC(kernel='poly', degree=2), C_values, cv=10)
grid_poly.fit(X_train_scaled, y_train)

best_C_poly = grid_poly.best_params_['C']
print(f"Optimal C for Polynomial kernel: {best_C_poly}")


Optimal C for Polynomial kernel: 10


In [66]:
svc_best_poly = SVC(kernel='poly', degree=2, C=best_C_poly)
svc_best_poly.fit(X_train_scaled, y_train)

train_preds_best_poly = svc_best_poly.predict(X_train_scaled)
test_preds_best_poly = svc_best_poly.predict(X_test_scaled)

train_error_best_poly = 1 - accuracy_score(y_train, train_preds_best_poly)
test_error_best_poly = 1 - accuracy_score(y_test, test_preds_best_poly)

print(f"Training error (Poly, best C={best_C_poly}): {train_error_best_poly:.4f}")
print(f"Test error (Poly, best C={best_C_poly}): {test_error_best_poly:.4f}")


Training error (Poly, best C=10): 0.1963
Test error (Poly, best C=10): 0.2444


In [67]:
print("🔍 Final Comparison of SVM Models:\n")

print(f"Linear Kernel:")
print(f"  Best C: {best_C}")
print(f"  Test Error Rate: {test_error_best:.4f}\n")

print(f"RBF Kernel:")
print(f"  Best C: {best_C_rbf}")
print(f"  Test Error Rate: {test_error_best_rbf:.4f}\n")

print(f"Polynomial Kernel (degree=2):")
print(f"  Best C: {best_C_poly}")
print(f"  Test Error Rate: {test_error_best_poly:.4f}\n")


🔍 Final Comparison of SVM Models:

Linear Kernel:
  Best C: 0.01
  Test Error Rate: 0.1963

RBF Kernel:
  Best C: 1
  Test Error Rate: 0.1889

Polynomial Kernel (degree=2):
  Best C: 10
  Test Error Rate: 0.2444



**RBF is the king**