In [None]:
import numpy as np
# load model with joblib
import joblib
loaded_svm_model = joblib.load('models/model-7-over.pkl')

# load test features and test labels
test_features = np.load('models/test_features-3-over.npy')
test_labels = np.load('models/test_labels-3-over.npy')

# make predictions
print("Making predictions...")
predictions = loaded_svm_model.predict(test_features)

# classification report
from sklearn.metrics import classification_report
print(classification_report(test_labels, predictions))

Plots to see if the model is overfitting

In [None]:
from datetime import datetime
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=75, gamma=0.01)
# learning curves
training_f = np.load('models/training_features-3-over.npy')
training_l = np.load('models/training_labels-3-over.npy')
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt
print("Plotting learning curve..." + datetime.now().strftime("%H:%M:%S"))
train_sizes, train_scores, test_scores = learning_curve(svm_model, training_f, training_l, cv=5, scoring='accuracy', n_jobs=4, train_sizes=np.linspace(0.01, 1.0, 5), verbose=1)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
val_scores_mean = np.mean(test_scores, axis=1)
val_scores_std = np.std(test_scores, axis=1)

plt.figure()
plt.title("Learning Curve")
plt.xlabel("Training Examples")
plt.ylabel("Score")
plt.grid()

plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r")
plt.fill_between(train_sizes, val_scores_mean - val_scores_std, val_scores_mean + val_scores_std, alpha=0.1, color="g")

plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training Score")
plt.plot(train_sizes, val_scores_mean, 'o-', color="g", label="Cross-Validation Score")

plt.legend(loc="best")
plt.show()



In [None]:
training_features = np.load('models/training_features-3.npy')
training_labels = np.load('models/training_labels-3.npy')
training_class_distribution = np.bincount(training_labels)

class_weights = dict(
    zip(
        range(len(training_class_distribution)),
        max(training_class_distribution) / training_class_distribution
    )
)

new_svm_model = SVC(kernel='poly', C=99, gamma='auto', class_weight=class_weights)
print("Training model..." + datetime.now().strftime("%H:%M:%S"))
new_svm_model.fit(training_features, training_labels)
print("Saving model..." + datetime.now().strftime("%H:%M:%S"))

# load test features and test labels
test_features = np.load('models/test_features-3.npy')
test_labels = np.load('models/test_labels-3.npy')

# make predictions
print("Making predictions...")
predictions = new_svm_model.predict(test_features)
print(classification_report(test_labels, predictions))