In [1]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle

In [4]:
# Load dataset
data = pd.read_csv("/Users/jacobfrancis/dev/csc180/csv/IRIS.csv")

In [5]:
# Encode target labels (convert species names to numbers)
le = LabelEncoder()
data['species'] = le.fit_transform(data['species'])

In [6]:
# Separate features (X) and labels (y)
X = data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y = data['species']


In [7]:
# Shuffle data to make folds more balanced
X, y = shuffle(X, y, random_state=42)

In [8]:
# Define K-Fold cross validation (we’ll use 5 folds)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [9]:
# Initialize models
log_model = LogisticRegression(max_iter=200)
svm_model = SVC(kernel='linear')       # start with linear kernel
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [10]:
# Evaluate each model using cross-validation
log_scores = cross_val_score(log_model, X, y, cv=kfold)
svm_scores = cross_val_score(svm_model, X, y, cv=kfold)
rf_scores = cross_val_score(rf_model, X, y, cv=kfold)

In [11]:
# Print average accuracy for each model
print("Average Accuracy (5-Fold CV):")
print(f"Logistic Regression: {log_scores.mean():.4f}")
print(f"SVM (linear kernel): {svm_scores.mean():.4f}")
print(f"Random Forest:       {rf_scores.mean():.4f}")

Average Accuracy (5-Fold CV):
Logistic Regression: 0.9600
SVM (linear kernel): 0.9867
Random Forest:       0.9533


In [12]:
# Try different SVM kernels to see effect on performance
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
print("\nSVM Accuracy by Kernel:")
for k in kernels:
    svm = SVC(kernel=k)
    scores = cross_val_score(svm, X, y, cv=kfold)
    print(f"Kernel = {k:8s} --> Accuracy: {scores.mean():.4f}")


SVM Accuracy by Kernel:
Kernel = linear   --> Accuracy: 0.9867
Kernel = poly     --> Accuracy: 0.9600
Kernel = rbf      --> Accuracy: 0.9667
Kernel = sigmoid  --> Accuracy: 0.2133
