In [1]:
import os
import sys
import numpy as np
import pandas as pd
import pickle
import random
import time
from joblib import Parallel, delayed
sys.path.append('../')

from packages import actv_analysis, svm
import seaborn as sns
# from packages.svm import SVM_fit
# from packages.load_csv import units_for_svm

In [None]:
nidx=range(0,10)
sidx=range(3,10)
img_inst=500

for i in range(10):
    rep_per_ns_combo=16
    df_idx = svm.gen_SVM_input(nidx=nidx, sidx=sidx, rep_per_ns_combo=rep_per_ns_combo, img_inst=img_inst)
    # save to csv file
    df_idx.to_csv(f'svm_training_set{i}.csv', index=True)
    
    rep_per_ns_combo=4
    df_idx = svm.gen_SVM_input(nidx=nidx, sidx=sidx, rep_per_ns_combo=rep_per_ns_combo, img_inst=img_inst)
    # save to csv file
    df_idx.to_csv(f'svm_test_set{i}.csv', index=True)

In [None]:
import random
import time

# Parameters
#relus = range(2,6)
relus = list(range(2,6))[::-1]
epochs = np.arange(0, 91, 10)
exps = np.arange(0, 10)
num_units = 100
rate_threshold = 0.05
layer_numunits = {'relu1':290400, 'relu2':186624, 'relu3':64896, 'relu4':64896, 'relu5':43264}

dir_path = os.path.dirname(os.path.realpath('../'))
#path_for_units = f"{dir_path}/dataframes/SVM/units/{num_units} units sampled from distribution higher than {rate_threshold} response rate including PN2 and PN20"
set_folder = f"{dir_path}/dataframes/SVM"  # retrieve training/test sets
save_to_folder = f"{dir_path}/dataframes/SVM_predictions"

for relu in relus:
    for epoch in epochs:
        for net in range(1, 2):
            pkl_filename = f'pkl/network{net}_Relu{relu}_epoch{epoch}.pkl'
            print(f'Loading {pkl_filename}..')
            with open(pkl_filename, 'rb') as f:
                units = pickle.load(f)
            print(f'Loading actv ..')
            actv_net = actv_analysis.get_actv_net(net=net, relu=relu, epoch=epoch)
            actv = actv_net.reshape(list(layer_numunits.values())[relu-1], 10, 10, 500)
            
            # select units whose activities corresponding to images will be used for svm training and testing:
            units_with_someresp = [units[i].id for i in range(len(units)) if units[i].response_rate_subset > rate_threshold]
            
            # Randomly choose "num_units" number of units from units_with_someresp without replacement
            random.seed(exp)
            units = random.sample(units_with_someresp, num_units)
            
            start_time = time.time()
            y_preds = Parallel(n_jobs=-1)(delayed(svm.SVM_fit)(units=units, actv=actv, exp=exp) for exp in exps)
            end_time = time.time()
            print(f"Took {end_time - start_time} seconds to run.")

            [pd.Series(y_preds[exp]).to_csv(f'csv/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {num_units} units that are randomly drawn from distribution exp{exp} June2023.csv', index=True) for exp in exps]


## Run SVM

In [None]:
relus = list(range(2,6))[::-1]
epochs = np.arange(0, 91, 10)
exps = np.arange(0, 10)
#num_units = 100
#rate_threshold = 0.05
layer_numunits = {'relu1':290400, 'relu2':186624, 'relu3':64896, 'relu4':64896, 'relu5':43264}

exps = range(10)
for relu in relus:
    for epoch in epochs:
        for net in range(1, 3):
            pkl_filename = f'pkl/network{net}_Relu{relu}_epoch{epoch}.pkl'

            if not os.path.exists(pkl_filename):
                print(f"{pkl_filename} does not exist, skipping.")
                continue

            # Initialize missing_exps_dict as empty dictionary
            missing_exps_dict = {}

            for outer_num_units in np.arange(200, 2001, 200):
                # Define filenames before processing
                csv_files = {exp: f'csv/svm_results/SVM prediction of He untrained net{net} relu{relu} epoch{epoch} {outer_num_units} nonzero activity units exp{exp} June2023.csv' for exp in exps}

                # Filter out the experiments where CSV files already exist
                missing_exps = [exp for exp, filename in csv_files.items() if not os.path.exists(filename)]

                if missing_exps:
                    missing_exps_dict[outer_num_units] = {exp: csv_files[exp] for exp in missing_exps}

            if not missing_exps_dict:
                print(f"All CSV files for net{net} relu{relu} epoch{epoch} already exist, skipping.")
                continue

            print(f'Loading {pkl_filename} and actv file..')
            with open(pkl_filename, 'rb') as f:
                units = pickle.load(f)

            actv_net = actv_analysis.get_actv_net(net=net, relu=relu, epoch=epoch)
            actv = actv_net.reshape(list(layer_numunits.values())[relu-1], 10, 10, 500)

            # select units whose activities corresponding to images will be used for svm training and testing:
            units_nonzero = [units[i].id for i in range(len(units)) if units[i].no_response_subset is not True]

            for outer_num_units in np.arange(200, 2001, 200):
                if outer_num_units not in missing_exps_dict:
                    print(f"All CSV files for net{net} relu{relu} epoch{epoch} num_units {outer_num_units} already exist, skipping.")
                    continue

                units_sample = random.sample(units_nonzero, outer_num_units)

                start_time = time.time()

                # Parallelize only the missing experiments
                y_preds = Parallel(n_jobs=-1)(delayed(svm.SVM_fit_with_seed)(exp, units_sample, actv) for exp in missing_exps_dict[outer_num_units].keys())

                # Save results to CSV
                for exp, y_pred in zip(missing_exps_dict[outer_num_units].keys(), y_preds):
                    pd.Series(y_pred).to_csv(missing_exps_dict[outer_num_units][exp], index=True)

                end_time = time.time()
                print(f"Took {end_time - start_time} seconds to run for num_units {outer_num_units}.")


## Run SVM with specific types of units

In [21]:
relus = np.arange(4,5)
epochs = np.arange(90, 91, 10)
exps = np.arange(0, 10)
#num_units = 100
#rate_threshold = 0.05
layer_numunits = {'relu1':290400, 'relu2':186624, 'relu3':64896, 'relu4':64896, 'relu5':43264}
outer_num_units = 200

exps = range(10)
for relu in relus:
    for epoch in epochs:
        for net in range(1, 3):
            pkl_filename = f'pkl/network{net}_Relu{relu}_epoch{epoch}.pkl'

            if not os.path.exists(pkl_filename):
                print(f"{pkl_filename} does not exist, skipping.")
                continue

            # Initialize missing_exps_dict as empty dictionary
            missing_exps_dict = {}

            print(f'Loading {pkl_filename} and actv file..')
            with open(pkl_filename, 'rb') as f:
                units = pickle.load(f)

            actv_net = actv_analysis.get_actv_net(net=net, relu=relu, epoch=epoch)
            actv = actv_net.reshape(list(layer_numunits.values())[relu-1], 10, 10, 500)

            # select units whose activities corresponding to images will be used for svm training and testing:
            #units_nonzero = [units[i].id for i in range(len(units)) if units[i].no_response_subset is not True]
            sn = [units[i].spearmanr_number for i in range(len(units))]
            ss = [units[i].spearmanr_size for i in range(len(units))]
            df_spearmanr = pd.DataFrame({'sn':sn, 'ss':ss})
            LNSS = df_spearmanr[(df_spearmanr['sn']>0.9) &( df_spearmanr['ss']<-0.9)].index
            LNLS = df_spearmanr[(df_spearmanr['sn']>0.9) &( df_spearmanr['ss']>0.9)].index

            for LNSS_prop in np.arange(0, 1.1, 0.1):
                LNLS_prop = round(1-LNSS_prop, 1)

                LNSS_sample = random.sample(list(LNSS), int(outer_num_units*LNSS_prop))
                LNLS_sample = random.sample(list(LNLS), int(outer_num_units*LNLS_prop))
                units_sample = np.union1d(LNSS_sample, LNLS_sample).astype(int)

                start_time = time.time()

                y_preds = Parallel(n_jobs=-1)(delayed(svm.SVM_fit_with_seed)(exp, units_sample, actv) for exp in exps)

                # Save results to CSV
                for exp, y_pred in zip(exps, y_preds):
                    csv_file = f'csv/svm_results/SVM_prediction_of_He_untrained_net{net}_relu{relu}_epoch{epoch}_{int(100*LNSS_prop)}_percent_LNSS_and_{int(100*LNLS_prop)}_percent_LNLS_units_exp{exp}_July2023.csv'
                    df = pd.DataFrame({'y_pred': y_pred})
                    df.to_csv(csv_file, index=False)

                end_time = time.time()
                print(f"It took {end_time - start_time} seconds to run for num_units {outer_num_units}.")


Loading pkl/network1_Relu4_epoch90.pkl and actv file..
--- 100.83805799484253 seconds ---
It took 52.76469302177429 seconds to run for num_units 200.
It took 32.72128200531006 seconds to run for num_units 200.
It took 32.43398571014404 seconds to run for num_units 200.
It took 32.12636113166809 seconds to run for num_units 200.
It took 31.03190779685974 seconds to run for num_units 200.
It took 30.641894817352295 seconds to run for num_units 200.
It took 32.14512276649475 seconds to run for num_units 200.
It took 35.02132797241211 seconds to run for num_units 200.
It took 34.48999810218811 seconds to run for num_units 200.
It took 42.40022897720337 seconds to run for num_units 200.
It took 58.83403491973877 seconds to run for num_units 200.
Loading pkl/network2_Relu4_epoch90.pkl and actv file..
--- 102.01696109771729 seconds ---
It took 52.91724896430969 seconds to run for num_units 200.
It took 37.628700256347656 seconds to run for num_units 200.
It took 39.217652797698975 seconds to 

In [2]:
net=1; relu=4; epoch=90
pkl_filename = f'pkl/network{net}_Relu{relu}_epoch{epoch}.pkl'
with open(pkl_filename, 'rb') as f:
    units = pickle.load(f)

In [4]:
sn = [units[i].spearmanr_number for i in range(len(units))]
ss = [units[i].spearmanr_size for i in range(len(units))]

In [16]:
np.union1d(np.arange(0,1.1,0.1), np.arange(0,2.1,0.2))

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.2, 1.4,
       1.6, 1.8, 2. ])

In [7]:
df_spearmanr = pd.DataFrame(spearmanr = {'sn':sn, 'ss':ss})

In [11]:
LNSS = df_spearmanr[(df_spearmanr['sn']>0.9) &( df_spearmanr['ss']<-0.9)].index