In [10]:
%matplotlib inline
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

def load_data(file_path):
    """Load features and labels from a space-separated file."""
    data = np.loadtxt(file_path, delimiter='\t')
    X = data[:, :-1]  # Extract features
    y = data[:, -1].astype(int)  # Extract labels and ensure they are integers
    return X, y

def plot_decision_boundary(clf, X, y):
    """Plot the decision boundary of a linear classifier."""
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.8)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.Paired)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title('SVM Decision Boundary')
    plt.show()

def load_unlabeled_data(file_path):
    """Load unlabeled data from a space-separated file."""
    data = np.loadtxt(file_path, delimiter='\t')
    return data  # Assuming the unlabeled data does not include a label column

# Load the dataset
file_path = '/Users/jnaysha/Desktop/Naysha\'s work/Courses/Physics/Music Machine Learning/rock/results.txt'
X, y = load_data(file_path)

# Split the dataset into training and test sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the classifier
clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the classifier
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Optionally, plot the decision boundary (for 2D data)
#plot_decision_boundary(clf, np.vstack((X_train, X_test)), np.hstack((y_train, y_test)))

# # Load unlabeled data
# unlabeled_data_path = '/Users/nathaliehaurberg/Data/Punk/Music/PHYSIQUE - The Rhythm of Brutality/test_new.txt'
# unlabeled_data = load_unlabeled_data(unlabeled_data_path)

# # Predict on unlabeled data
# unlabeled_predictions = clf.predict(unlabeled_data)

# # Determine if unlabeled data points belong to category 1
# category_1_predictions = unlabeled_predictions == 1

# # Print indices of data points predicted to belong to category 1
# print("Indices of unlabeled data points predicted to belong to category 1:")
# print(np.where(category_1_predictions)[0])


'''
Alternative ways to initialize the classifier:
# Initialize the classifier with RBF kernel
clf = SVC(kernel='rbf')
# Initialize the classifier with a polynomial kernel of degree 3
clf = SVC(kernel='poly', degree=3)
# Initialize the classifier with RBF kernel and a specific gamma
clf = SVC(kernel='rbf', gamma='scale')  # 'scale' is default in newer versions, 'auto' uses 1/n_features
'''

Accuracy: 0.0
Confusion Matrix:
 [[0 0 0 ... 1 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Classification Report:
               precision    recall  f1-score   support

        1219       0.00      0.00      0.00       1.0
        1780       0.00      0.00      0.00       1.0
        1940       0.00      0.00      0.00       1.0
        2552       0.00      0.00      0.00       0.0
        2627       0.00      0.00      0.00       1.0
        3149       0.00      0.00      0.00       0.0
        3638       0.00      0.00      0.00       0.0
        3657       0.00      0.00      0.00       1.0
        4454       0.00      0.00      0.00       1.0
        4621       0.00      0.00      0.00       1.0
        4665       0.00      0.00      0.00       1.0
        4751       0.00      0.00      0.00       1.0
        5199       0.00      0.00      0.00       1.0
        5212       0.00      0.00      0.00       1.0
        5353

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


"\nAlternative ways to initialize the classifier:\n# Initialize the classifier with RBF kernel\nclf = SVC(kernel='rbf')\n# Initialize the classifier with a polynomial kernel of degree 3\nclf = SVC(kernel='poly', degree=3)\n# Initialize the classifier with RBF kernel and a specific gamma\nclf = SVC(kernel='rbf', gamma='scale')  # 'scale' is default in newer versions, 'auto' uses 1/n_features\n"