<a href="https://colab.research.google.com/github/ccrsypherd/FoxFarm/blob/main/Final_AugmentedData_NoRFE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
### Initialize ###

!pip install nilearn
import warnings
from nilearn.connectome import ConnectivityMeasure
# ignore the warning message from nilearn
warnings.filterwarnings("ignore", category=FutureWarning)

import torch
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from nilearn.connectome import ConnectivityMeasure
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from matplotlib.colors import ListedColormap
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Mount Google Drive
drive.mount('/content/gdrive')

# Define the paths to the data folders
data_dir = '/content/gdrive/MyDrive/ds18/smalldense/'
class_folders = os.listdir(data_dir)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
### Load Original Data and Labels ###
# Initialize empty lists for data and labels
data = []
labels = []

# Loop over the class folders
for i, class_folder in enumerate(class_folders):
    class_path = os.path.join(data_dir, class_folder)
    file_names = os.listdir(class_path)
    
    # Loop over the files in the class folder
    for file_name in file_names:
        if file_name.endswith('.dot'):
            file_path = os.path.join(class_path, file_name)
            with open(file_path, 'r') as f:
                file_contents = np.load(file_path)
                # Append the file contents to the data list
                data.append(file_contents)
                # Append the class label to the labels list
                labels.append(i)
# Convert the data and labels to numpy arrays
data = np.array(data)
print(data.shape)
labels = np.array(labels)
print(labels)

(30, 3001, 3001)
[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2]


In [3]:
# Define the amount of perturbation to apply to each matrix
perturbation_scale = 0.01

# Define the number of new matrices to create by perturbing each original matrix
num_augmentations = 3

# Initialize empty arrays to hold the augmented data and labels
augmented_data = np.zeros((data.shape[0] * num_augmentations, data.shape[1], data.shape[2]))
augmented_labels = np.zeros((data.shape[0] * num_augmentations,), dtype=labels.dtype)

# Loop over each matrix in the original dataset
for i in range(data.shape[0]):
    # Add random noise to create multiple new matrices
    for j in range(num_augmentations):
        perturbation = perturbation_scale * np.random.randn(data.shape[1], data.shape[2])
        new_matrix = data[i] + perturbation
        augmented_data[i*num_augmentations+j] = new_matrix
        augmented_labels[i*num_augmentations+j] = labels[i]

# Check the shape of the combined dataset
print(augmented_data.shape)  # Output: (120, 3001, 3001)

# The augmented data will now contain (30 * num_augmentations) matrices, with corresponding labels
del data
del labels

# Flatten each correlation matrix into a one-dimensional array
feature_vectors = augmented_data.reshape(augmented_data.shape[0], -1)
print(feature_vectors.shape)
del augmented_data

(90, 3001, 3001)
(90, 9006001)


In [4]:
#### Apply SVM Directly to the Flattened Matrices of 900301 ###

# Define kernel values to loop over
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

# Define number of iterations
num_iterations = 5

# Initialize results array
results = np.zeros((num_iterations, 5))

# Loop over different kernel values
for kernel in kernels:
    # Loop over iterations
    for i in range(num_iterations):
        print(i)
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(feature_vectors, augmented_labels, test_size=0.2)

        # Train SVM classifier
        svm = SVC(kernel=kernel)

        # Fit model and make predictions
        svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)

        # Calculate metrics and store in results array
        results[i][0] = accuracy_score(y_train, svm.predict(X_train))
        results[i][1] = accuracy_score(y_test, y_pred)
        results[i][2] = precision_score(y_test, y_pred, average='macro')
        results[i][3] = recall_score(y_test, y_pred, average='macro')
        results[i][4] = f1_score(y_test, y_pred, average='macro')

    # Print results for each kernel
    print(f'Kernel: {kernel}')
    print(f'{"Training Accuracy":<10} {"Testing Accuracy":<10} {"Precision":<10} {"Recall":<10} {"F1 Score":<10}')
    for i in range(num_iterations):
        print(f'{results[i][0]:<10.3f} {results[i][1]:<10.3f} {results[i][2]:<10.3f} {results[i][3]:<10.3f} {results[i][4]:<10.3f}')
    print(f'Mean:      {np.mean(results[:, 0]):<10.3f} {np.mean(results[:, 1]):<10.3f} {np.mean(results[:, 2]):<10.3f} {np.mean(results[:, 3]):<10.3f} {np.mean(results[:, 4]):<10.3f}')
    print('-------------------------------------------------------')

0
1
2
3
4
Kernel: linear
Training Accuracy Testing Accuracy Precision  Recall     F1 Score  
1.000      1.000      1.000      1.000      1.000     
1.000      1.000      1.000      1.000      1.000     
1.000      1.000      1.000      1.000      1.000     
1.000      1.000      1.000      1.000      1.000     
1.000      0.833      0.889      0.900      0.875     
Mean:      1.000      0.967      0.978      0.980      0.975     
-------------------------------------------------------
0
1
2
3
4
Kernel: poly
Training Accuracy Testing Accuracy Precision  Recall     F1 Score  
0.847      0.944      0.967      0.944      0.952     
0.792      0.833      0.900      0.822      0.837     
0.806      0.944      0.952      0.917      0.927     
0.792      0.833      0.875      0.875      0.846     
0.819      0.722      0.861      0.689      0.703     
Mean:      0.811      0.856      0.911      0.849      0.853     
-------------------------------------------------------
0
1
2
3
4
Kernel: rbf


In [None]:
######## t-SNE Data to 2D to Visualize SVM #########
num_iterations = 10
# Define colors for each class
colors = ['yellow', 'green', 'red']
class_labels = ['Control', 'Tame', 'Aggressive']

# Define perplexity and kernel values to loop over
perplexities = [5, 10, 15]
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

# Loop over different perplexity and kernel values
for perplexity in perplexities:
    singleplot = 1
    for kernel in kernels:
        # Perform t-SNE
        tsne = TSNE(n_components=2, perplexity=perplexity, random_state = 1)
        X_2d = tsne.fit_transform(feature_vectors)
        
        # Print results for each kernel
        print(f'Kernel: {kernel}')
        print(f'Perplexity: {perplexity}')

        for i in range(num_iterations):
        
            # Split data into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X_2d, augmented_labels, test_size=0.2)

            # Fit model and make predictions
            svm.fit(X_train, y_train)
            y_pred = svm.predict(X_test)

            # Calculate metrics and store in results array
            results[i][0] = accuracy_score(y_train, svm.predict(X_train))
            results[i][1] = accuracy_score(y_test, y_pred)
            
        print(f'Mean:      {np.mean(results[:, 0]):<10.3f} {np.mean(results[:, 1]):<10.3f}')    
        print('-------------------------------------------------------')

        # Plot decision boundary and scatter plot
        xx, yy = np.meshgrid(np.linspace(X_2d[:, 0].min()-1, X_2d[:, 0].max()+1, 100),
                             np.linspace(X_2d[:, 1].min()-1, X_2d[:, 1].max()+1, 100))
        Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)

        plt.figure(figsize=(8, 8))
        plt.gca().set_facecolor('honeydew') # set background color
        plt.contourf(xx, yy, Z, alpha=0.4, cmap=ListedColormap(['forestgreen','gold','lightblue','lightcoral','gray']), levels=[-1,0,1])
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.title('SVM classifier on t-SNE features\nPerplexity: {}, Kernel: {}, Accuracy: {:.2f}'.format(perplexity, kernel, np.mean(results[:, 1])))

        scatter_train = plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=ListedColormap(colors), alpha=0.8, edgecolors='none')
        scatter_test = plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=ListedColormap(colors), alpha=0.8, edgecolors='k')
        
        handles_train, labels_train = scatter_train.legend_elements()
        handles_test, labels_test = scatter_test.legend_elements()
        plt.legend(handles_train + handles_test, class_labels, loc='upper left')

        # Save figure to file
        plt.savefig('ADataNoRFE_perp{}_kernel{}.png'.format(perplexity, kernel))