In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.svm import SVC
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from scipy import stats as st
import pickle
import joblib
from datetime import datetime
from tqdm import tqdm
from sklearn.preprocessing import normalize


In [2]:
%load_ext cudf.pandas

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [4]:
!cp /content/drive/MyDrive/Dopamine-Astrocyte-Behavior/arial.ttf /usr/share/fonts/truetype/liberation

In [5]:
from matplotlib import font_manager
from matplotlib import rcParams

os.system('fc-cache -f -v')

# Add the font to matplotlib's font cache
font_files = font_manager.findSystemFonts(fontpaths='/usr/share/fonts/truetype/')
for font_file in font_files:
    font_manager.fontManager.addfont(font_file)
print(font_manager.findSystemFonts(fontpaths=None, fontext='ttf'))
rcParams['figure.figsize'] = [6, 4]
rcParams['font.size'] = 6
rcParams['pdf.fonttype'] = 42
rcParams['font.family'] = 'Arial'

['/usr/share/fonts/truetype/humor-sans/Humor-Sans.ttf', '/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf', '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', '/usr/share/fonts/truetype/liberation/LiberationSans-Italic.ttf', '/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf', '/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Bold.ttf', '/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf', '/usr/share/fonts/truetype/liberation/LiberationMono-BoldItalic.ttf', '/usr/share/fonts/truetype/liberation/LiberationSerif-Italic.ttf', '/usr/share/fonts/truetype/liberation/arial.ttf', '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', '/usr/share/fonts/truetype/liberation/LiberationSerif-BoldItalic.ttf', '/usr/share/fonts/truetype/liberation/LiberationSans-BoldItalic.ttf', '/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Italic.ttf', '/usr/share/fonts/truetype/liberation/LiberationMono-Italic.ttf', '/usr

In [6]:

def heatmap(y_test,y_predicted,variable,model_type, labels):
    conf_matrix = confusion_matrix(y_test,y_predicted)
    classification_rep = classification_report(y_test, y_predicted)
    conf_matrix_normalized = normalize(conf_matrix, axis=1, norm='l1')

    # Plot the confusion matrix
    # print(classification_rep)
    print(accuracy_score)
    os.makedirs(f'/content/drive/MyDrive/Dopamine-Astrocyte-Behavior/{today}-{task}', exist_ok=True)

    sns.heatmap(conf_matrix_normalized, annot=True, fmt='.2f', cmap='Blues', vmin=0, vmax=0.8, xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    splits = variable.split('-')
    title = f'Animal {model_type} Confusion Matrix: {splits[0]}-{splits[1]}'
    plt.title(title)
    plt.savefig(f'/content/drive/MyDrive/Dopamine-Astrocyte-Behavior/{today}-{task}/{title}.pdf')
    #plt.savefig(f'/content/drive/MyDrive/Dopamine-Astrocyte-Behavior/{today}-{task}/{title}.svg')
    conf_matrix_df = pd.DataFrame(conf_matrix_normalized, index=labels, columns=labels)
    conf_matrix_df.to_csv(f'/content/drive/MyDrive/Dopamine-Astrocyte-Behavior/{today}-{task}/conf_matrix_normalized_{title}.csv'.format())
    plt.show()

variable_names = ['Green-L-z(Ast)', 'Green-R-z(Ast)', 'Red-L-z(DA)', 'Red-R-z(DA)']
def get_variable_data(variable, all_necessary_dfs):
    all_necessary_dfs_per_variable = []
    for df in all_necessary_dfs:
        all_necessary_dfs_per_variable.append(df[variable_names[variable]][:-1].T)
    return all_necessary_dfs_per_variable


In [7]:
import warnings
warnings.filterwarnings("ignore")
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

from tensorflow.keras.layers import Dropout
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from keras.callbacks import ReduceLROnPlateau

from tqdm import tqdm
def run_me(task, under_sample, all_necessary_dfs, labels):
    accuracies = pd.DataFrame(index = variable_names, columns = ['Accuracy'])
    accuraciescnn = pd.DataFrame(index = variable_names, columns = ['Accuracy'])
    accuraciesRFC = pd.DataFrame(index = variable_names, columns = ['Accuracy'])

    epochs = 500
    batch_size = 30
    for i in tqdm(range(4)):
      variable = i
      all_necessary_dfs_per_variable = get_variable_data(variable, all_necessary_dfs)

      combined_data = pd.DataFrame()
      min_sample = np.min([df.shape[0] for df in all_necessary_dfs_per_variable])
      for idx, all_necessary_df_per_variable in enumerate(all_necessary_dfs_per_variable):

          all_necessary_df_per_variable['port'] = idx
          if under_sample ==False:
              combined_data = pd.concat([combined_data,all_necessary_df_per_variable], axis =0)
          elif under_sample ==True:
          # it is undersample to fix the data imbalance problem.
              combined_data =pd.concat([combined_data, all_necessary_df_per_variable.sample(frac=min_sample/len(all_necessary_df_per_variable), random_state=42)], axis =0)
      X= combined_data.drop(columns=['port'])
      X = X.fillna(0)
      X = X.values
      X = np.array(X)
      # replace nan values into 0
      y = combined_data['port'].values
      X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = .3, random_state = 77)
      print("X shape:", X.shape)
      print("y shape:", y.shape)
      svm_classifier = SVC()
      # Define the parameter grid
      param_grid = {
      'C': [0.1, 1, 10],  # Example values for C
      'gamma': [0.001, 0.01, 0.1],  # Example values for gamma
      'kernel': ['linear', 'poly', 'sigmoid']
      }

      model = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy', verbose=0)

      # Fit the classifier to the training data
      model.fit(X_train, y_train)

      class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)

      class_weights = dict(enumerate(class_weights))
      # Initialize the GridSearchCV object
      rlrop = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=50)

      modelcnn = Sequential()
      modelcnn.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
      modelcnn.add(MaxPooling1D(pool_size=2))
      modelcnn.add(Dropout(0.3))  # Add dropout layer
      modelcnn.add(Flatten())
      modelcnn.add(Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))  # Add L2 regularization
      modelcnn.add(Dropout(0.3))  # Add dropout layer
      modelcnn.add(Dense(len(np.unique(y_train)), activation='softmax'))

      # Compile the model
      modelcnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
      # Train the model
      modelcnn.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_data=(X_test, y_test), verbose=0, class_weight=class_weights)

      accuracy = model.score(X_test, y_test)
      _, accuracycnn = modelcnn.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
      print(i)

      rf_classifier = RandomForestClassifier()
      rf_classifier.fit(X_train, y_train)
      # Predict the event type on test data
      y_pred_rf = rf_classifier.predict(X_test)
      accuracyrfc = accuracy_score(y_test, y_pred_rf)

      accuracies.iloc[i]['Accuracy'] = accuracy
      accuraciescnn.iloc[i]['Accuracy'] = accuracycnn
      accuraciesRFC.iloc[i]['Accuracy'] = accuracyrfc

      y_predicted = model.predict(X_test)
      y_predictedcnn = np.argmax(modelcnn.predict(X_test),axis = -1)

      #LSTM



      heatmap(y_test,y_predicted, variable_names[i],model_type = f'{animal_for_this_code} {task} SVC', labels= labels)
      heatmap(y_test,y_predictedcnn, variable_names[i],model_type = f'{animal_for_this_code} {task} CNN', labels =labels)
      heatmap(y_test,y_pred_rf, variable_names[i],model_type = f'{animal_for_this_code} {task} RFC', labels= labels)


      # Encode labels to integers
      label_encoder = LabelEncoder()
      y_encoded = label_encoder.fit_transform(y)

      # Split the data into training and testing sets
      X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

      # Reshape X to be 3D as required by LSTM (samples, timesteps, features)
      X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
      X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
      # Compute class weights
      class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)

      class_weights = dict(enumerate(class_weights))
      # Build the LSTM model
      model = Sequential()
      model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
      model.add(Dropout(0.3))
      model.add(LSTM(32, return_sequences=False))
      model.add(Dropout(0.3))
      model.add(Dense(16, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
      model.add(Dense(len(np.unique(y_train)), activation='softmax'))
      early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

      # Compile the model
      model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

      # Early stopping callback
      early_stopping = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)

      # Train the model with early stopping
      history = model.fit(X_train, y_train, epochs=300, batch_size=16, validation_data=(X_test, y_test),
                          callbacks=[early_stopping],class_weight=class_weights, verbose=0)

      # Evaluate the model
      y_pred = model.predict(X_test)
      y_pred_classes = np.argmax(y_pred, axis=1)

      # Print classification report
      heatmap(y_test,y_pred_classes, variable_names[i],model_type = f'{animal_for_this_code} {task} LSTM', labels =labels)
          # print(y_predictedcnn)

In [8]:
def run_all_for_each_animal(animal_for_this_code, task, all_necessary_dfs):

    task_name = task
    run_me(task= task_name, under_sample = False, all_necessary_dfs = all_necessary_dfs, labels=labels)

    task_name = task  +"undersample"
    run_me(task = task_name, under_sample = True, all_necessary_dfs = all_necessary_dfs, labels =labels)

In [9]:
lists = ["Airpuff", "Left correct+omission", 'Left correct+rewarded', "Right correct+omission", 'Right correct+rewarded']
labels = ["Airpuff", "L-Omission", 'L-Reward', "R-Omission", 'R-Reward']

In [None]:
task = "choice-space classification 4s all animals"
today = datetime.today().strftime('%y%m%d')
print(today, task)

data_path = "/content/drive/MyDrive/Dopamine-Astrocyte-Behavior/Pkl_files_for_SVM/240527_event_by_event"

animal_types = ["NTS", "PNOC"]

for animal_type in animal_types:
    if animal_type == "PNOC":
        animal_numbers = [1,2,3,4,5]
    elif animal_type == "NTS":
        animal_numbers = [2,3,5,6]
    for animal_number in tqdm(animal_numbers):
        animal_for_this_code = animal_type + str(animal_number) + "_"
        print(animal_for_this_code)

        file_paths = sorted(os.listdir(data_path))
        files_ = []
        for file in sorted(file_paths):
            if animal_for_this_code in file:
                files_.append(file)
        all_necessary_dfs = []
        for list_ in lists:
            concat_all = pd.DataFrame()
            #labels.append(list_[:-5])
            len_files = 0
            for file_ in tqdm(files_):
                if list_ in file_:
                    concat_all = pd.concat([concat_all, pd.read_pickle(os.path.join(data_path, file_))], axis =1)
                    len_files+=1
                    print(file_)
            print(len_files)
            all_necessary_dfs.append(concat_all)

        run_all_for_each_animal(animal_for_this_code,task=task, all_necessary_dfs=all_necessary_dfs)

Output hidden; open in https://colab.research.google.com to view.

In [11]:
print(1)

1
