In [5]:
import sys
sys.path.append('/project2/alvinjin_1630/results')
from Trainer import Trainer
import os
import joblib
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier

# ============================================================
# 1. Load and prepare dataset
# ============================================================

data = Trainer.full_dataset
points = np.array(data[0])   # (x, y) coordinates
labels = np.array(data[1])   # 0 or 1 labels

X_train, X_test, y_train, y_test = train_test_split(points, labels, test_size=0.2, random_state=42)

# ============================================================
# 2. Define file paths for saved models
# ============================================================

SVM_MODEL_PATH = "svm_model.pkl"
KNN_MODEL_PATH = "knn_model.pkl"

# ============================================================
# 3. Train and save models if not already saved
# ============================================================

if not os.path.exists(SVM_MODEL_PATH):
    print("Training SVM model...")
    clf = make_pipeline(
        StandardScaler(),
        svm.SVC(kernel='rbf', C=100, gamma=10)
    )
    clf.fit(X_train, y_train)
    joblib.dump(clf, SVM_MODEL_PATH)
    print(f"SVM model saved to {SVM_MODEL_PATH}")
else:
    print("Loading saved SVM model...")
    clf = joblib.load(SVM_MODEL_PATH)

if not os.path.exists(KNN_MODEL_PATH):
    print("Training KNN model...")
    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)
    joblib.dump(knn, KNN_MODEL_PATH)
    print(f"KNN model saved to {KNN_MODEL_PATH}")
else:
    print("Loading saved KNN model...")
    knn = joblib.load(KNN_MODEL_PATH)

# ============================================================
# 4. Evaluate accuracy (optional)
# ============================================================

y_pred_svm = clf.predict(X_test)
print(f"SVM Accuracy: {accuracy_score(y_test, y_pred_svm):.2f}")
print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))

print(f"KNN Accuracy: {knn.score(X_test, y_test):.2f}")

# ============================================================
# 5. Helper functions for evaluating new data
# ============================================================

def svm_label_accuracy(true_label, points):
    """
    Computes the percentage of points that the saved SVM model
    classifies as the given true_label.
    """
    # Load model (works even in read-only environments)
    clf = joblib.load(SVM_MODEL_PATH)
    points = np.array(points)
    preds = clf.predict(points)
    correct = np.sum(preds == true_label)
    return (correct / len(points)) * 100


def knn_label_accuracy(true_label, points):
    """
    Computes the percentage of points that the saved KNN model
    classifies as the given true_label.
    """
    knn = joblib.load(KNN_MODEL_PATH)
    points = np.array(points)
    preds = knn.predict(points)
    correct = np.sum(preds == true_label)
    return (correct / len(points)) * 100

# ============================================================
# Example usage (can be commented out in production)
# ============================================================

# if __name__ == "__main__":
#     sample_points = [[0.2, 0.3], [1.1, 0.9], [0.8, 1.2]]
#     print(f"SVM accuracy on label 1 points: {svm_label_accuracy(1, sample_points):.2f}%")
#     print(f"KNN accuracy on label 1 points: {knn_label_accuracy(1, sample_points):.2f}%")


/project2/alvinjin_1630


ModuleNotFoundError: No module named 'pandas'