In [1]:
import os
import sys
import numpy as np
import h5py
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from sklearn.decomposition import PCA
import seaborn as sns
import datetime
from scipy.stats import sem
import matplotlib.cm as cm
import pathlib
import traceback
import gc
import configs

import torch
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from predify.utils.training import train_pcoders, eval_pcoders
from data.ValidationDataset import NoisyDataset

from models.networks_2022 import BranchedNetwork

In [2]:
netname = 'pnet'
engram_dir = '/mnt/smb/locker/abbott-locker/hcnn/'
activations_dir = f'{engram_dir}3_activations/{netname}/'

In [3]:
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f'Device: {DEVICE}')

Device: cpu


In [4]:
bgs = ['pinkNoise', 'AudScene', 'Babble8Spkr']
snrs = [-9.0, -6.0, -3.0, 0.0, 3.0]

In [5]:
conv_idx = 3 # 1-5
t = 0
X = []
y = []
for bg in bgs:
    for snr in snrs:
        activ_dir = f'{activations_dir}{bg}_snr{int(snr)}/'
        for results_file in os.listdir(activ_dir):
            results_filepath = f'{activ_dir}{results_file}'
            results = h5py.File(results_filepath, 'r')
        activ = np.array(results[f'conv{conv_idx}_{t}_activations'])
        n_data = activ.shape[0]
        activ = list(activ.reshape((n_data, -1)))
        X.extend(activ)
        y.extend([snr]*n_data) # bg also possible

In [6]:
len(X)

8279

In [7]:
X[0].shape

(60928,)

In [8]:
from sklearn.decomposition import PCA

In [12]:
pca = PCA(n_components=1000)

In [13]:
pca.fit(X)

PCA(n_components=1000)

In [15]:
sum(pca.explained_variance_ratio_)

0.7486383568913523

In [16]:
X_pca = pca.transform(X)

In [17]:
X_pca.shape

(8279, 1000)

# Project and split data

In [19]:
train_ratio = 0.9
n_train = int(train_ratio*len(X_pca))
X_train = X_pca[:n_train]
y_train = y[:n_train]
X_valid = X_pca[n_train:]
y_valid = y[n_train:]

# Fit SVM model

In [20]:
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [21]:
clf = make_pipeline(
    StandardScaler(),
    SVR(kernel='linear', gamma='auto'))

In [22]:
clf.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svr', SVR(gamma='auto', kernel='linear'))])

In [24]:
yhat_valid = clf.predict(X_valid)

In [26]:
clf.score(X_valid, y_valid)

-6.89955857939792

# Copy of activation keys

In [6]:
results.keys()

<KeysViewHDF5 ['0_clean_logits', '0_clean_output', '0_logits', '0_output', '1_clean_logits', '1_clean_output', '1_logits', '1_output', '2_clean_logits', '2_clean_output', '2_logits', '2_output', '3_clean_logits', '3_clean_output', '3_logits', '3_output', '4_clean_logits', '4_clean_output', '4_logits', '4_output', 'clean_correct', 'conv1_0_activations', 'conv1_0_clean_activations', 'conv1_1_activations', 'conv1_1_clean_activations', 'conv1_2_activations', 'conv1_2_clean_activations', 'conv1_3_activations', 'conv1_3_clean_activations', 'conv1_4_activations', 'conv1_4_clean_activations', 'conv2_0_activations', 'conv2_0_clean_activations', 'conv2_1_activations', 'conv2_1_clean_activations', 'conv2_2_activations', 'conv2_2_clean_activations', 'conv2_3_activations', 'conv2_3_clean_activations', 'conv2_4_activations', 'conv2_4_clean_activations', 'conv3_0_activations', 'conv3_0_clean_activations', 'conv3_1_activations', 'conv3_1_clean_activations', 'conv3_2_activations', 'conv3_2_clean_activa