In [1]:
import numpy as np
import pandas as pd
import time
import sys
import os
import h5py
from joblib import Parallel, delayed
sys.path.append('../')
from packages import actv_analysis, svm, load_csv, stats, objects, test_codes
import pickle
import re
import glob
import random
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
from sklearn.preprocessing import MinMaxScaler

In [2]:
df_cularea = pd.read_csv('../number_size_cumulativearea.csv', index_col=0)

In [5]:
def regression_model(self, avg_actv_nxs, var='both'):
    # Read df_cularea
    df_cularea = pd.read_csv('../number_size_cumulativearea.csv', index_col=0)

    # Log-transform numbers
    log_numbers = np.log10(df_cularea.index.values)
    log_numbers = np.repeat(log_numbers, df_cularea.shape[1]).reshape(-1, 1)

    # Log-transform cumulative area values
    log_cularea = np.log10(df_cularea.values).reshape(-1, 1)

    # Min-Max scaling after log transformation
    scaler = MinMaxScaler()
    log_numbers_scaled = scaler.fit_transform(log_numbers)
    log_cularea_scaled = scaler.fit_transform(log_cularea)

    # Select the variables based on the input parameter
    if var == 'number':
        X = log_numbers_scaled
    elif var == 'area':
        X = log_cularea_scaled
    elif var == 'both':
        # Concatenate to form design matrix X
        X = np.hstack([log_numbers_scaled, log_cularea_scaled])
    else:
        raise ValueError("Invalid variable selected. Choose 'number', 'area', or 'both'.")

    X = sm.add_constant(X)  # Add constant for the intercept

    # Flatten the 3D matrix to 2D for avg_actv_nxs
    n_samples, nx, ny = avg_actv_nxs.shape
    
    # Extract the normalized values for the specific object using self.id
    y = avg_actv_nxs[self.id].reshape(-1, 1)

    # Perform regression
    model = sm.OLS(y, X).fit()

    # Save the coefficients and R-squared as attributes based on the chosen variable(s)
    if var == 'number':
        self.coeff_number = model.params[1]  # Assuming that after the constant, the first coefficient corresponds to 'number'
        self.r_sqrd_number = model.rsquared
    elif var == 'area':
        self.coeff_area = model.params[1]  # Assuming that after the constant, the first coefficient corresponds to 'area'
        self.r_sqrd_area = model.rsquared
    elif var == 'both':
        self.coeff1 = model.params[1]  # Coefficient for 'number'
        self.coeff2 = model.params[2]  # Coefficient for 'area'
        self.r_sqrd = model.rsquared


In [None]:
from joblib import Parallel, delayed

def process_net_epoch(net, epoch):
    print(f"Processing for net: {net}, epoch: {epoch}")

    # Define the pkl_filename using the current net and epoch
    pkl_filename = f'../../pkl/4to20/network{net}_Relu{relu}_epoch{epoch}_4to20.pkl'
    
    # Load units from the pickle file
    with open(pkl_filename, 'rb') as f:
        units = pickle.load(f)
    
    # Perform analysis
    actv_net = actv_analysis.get_actv_net(net=net, relu=4, epoch=epoch)
    avg_actv_net = np.mean(actv_net, axis=2)
    avg_actv_net_nxs = avg_actv_net.reshape(actv_net.shape[0],10,10)

    # Flatten the 3D matrix and normalize
    n_samples, nx, ny = avg_actv_net_nxs.shape
    flattened_avg_actv_nxs = avg_actv_net_nxs.reshape((n_samples, nx*ny))
    scaler_y = MinMaxScaler()
    flattened_avg_actv_nxs_normalized = scaler_y.fit_transform(flattened_avg_actv_nxs)
    avg_actv_nxs_normalized = flattened_avg_actv_nxs_normalized.reshape((n_samples, nx, ny))

    for i in range(avg_actv_net_nxs.shape[0]):
        # Assuming regression_model method returns R-squared value
        r_squared_value = units[i].regression_model(avg_actv_nxs_normalized, var='number')

    # Save the updated units back to the pickle file
    with open(pkl_filename, 'wb') as f:
        pickle.dump(units, f)

# Define the range for net and epoch
relu = 4
net_range = range(2, 11)  # from 1 to 10
epoch_range = range(0, 91, 90)  # from 0 to 90 with increments of 10

# Using Parallel with all available cores
Parallel(n_jobs=-1)(delayed(process_net_epoch)(net, epoch) for net in net_range for epoch in epoch_range)