# Create all models (S, D, P, C) from Null Model

Starting from our previous null model (connectome-derived ESN), we will **subtract** topological features to determine their impact on performance and variance. Specifically, we will create models corresponding to each subtracted feature: models S, D, P, and C.

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import random

from IPython.display import display

In [None]:
# load null reservoir
with open('Model_Files/null.npy', 'rb') as f:
    model_null = np.load(f)

display(model_null)

### Create model S

In [None]:
# function to get array sparsity
def get_sparsity(array):
    #display(array.size)
    return np.count_nonzero(array)/array.size

# function to decrease null sparsity to 20%
def decrease_sparsity(dataframe):
    values_to_add = np.zeros(1) 
    
    #bootstrap to get population array values
    num_boot = 50
    for i in range(num_boot):
        df_star = dataframe.sample(len(dataframe),replace=True)
        df_star_arr = df_star.values
        nonzero_vals = df_star_arr[df_star_arr!=0]
        
        # add 2000 values to values_to_add array
        values_to_add = np.concatenate((values_to_add,pd.DataFrame(nonzero_vals).sample(2000,replace=False).values),axis=None)
        
    # replace random zero values with values from population (bootstrapped)
    df_arr = dataframe.values
    for i in range(len(df_arr)):
        for j in range(len(df_arr)):
            if(np.random.rand() > 0.81 and df_arr[i,j]==0): #with % chance, if zero value then replace
                df_arr[i,j] = np.random.choice(values_to_add)
        #if(i%100==0):
            #print(i)
    return df_arr, values_to_add


display(get_sparsity(model_null))
new_arr,vals_connectome_bootstrapped = decrease_sparsity(pd.DataFrame(model_null)) #decrease sparsity, starting from the null model
display(get_sparsity(new_arr))

# save the new reservoir as Model S
with open('S_null_sparse.npy', 'wb') as f:
    np.save(f, new_arr)

### Create model D

In [None]:
# re-sample all values from uniform distribution

# start from the null model
with open('Model_Files/null.npy', 'rb') as f:
    modelb = np.load(f)

b_mask = modelb != 0 #create mask to find nonzero locations
display(b_mask)

c = np.count_nonzero(b_mask) # count nonzero values
display(c)

# # create model b by sampling from uniform distr.
vals = np.where(True, np.random.randint(0,10000, c) , 0)
display(vals.size)

# # Assign the sampled values into the b reservoir
modelb[b_mask] = vals
display(modelb)

# # for uniform distribution on [0,1)
result = np.linalg.norm(modelb)
display(result)
new_b=modelb/result

display(new_b)
# # save model B
with open('D_null_uniform.npy', 'wb') as f:
    np.save(f, new_b)

### Create model P

In [None]:
# permute row-wise and col-wise while retaining zero positions
with open('Model_Files/null.npy', 'rb') as f:
    modelb2 = np.load(f)

print("permute across row...")

#permute across row
for i in range(len(modelb2)):
    idx = np.nonzero(modelb2[i])
    modelb2[i][idx] = np.random.permutation(modelb2[i][idx])

#display(modelb2)
print("then across column...")

#then permute across col
for j in range(modelb2.shape[1]):
    idy = np.nonzero(modelb2[:,j])
    modelb2[idy,j] = np.random.permutation(modelb2[idy][:,j])

print("permute across row...")

#permute across row
for i in range(len(modelb2)):
    idx = np.nonzero(modelb2[i])
    modelb2[i][idx] = np.random.permutation(modelb2[i][idx])

#display(modelb2)
print("then across column...")

#then permute across col
for j in range(modelb2.shape[1]):
    idy = np.nonzero(modelb2[:,j])
    modelb2[idy,j] = np.random.permutation(modelb2[idy][:,j])
    
print("permute across row...")

#permute across row
for i in range(len(modelb2)):
    idx = np.nonzero(modelb2[i])
    modelb2[i][idx] = np.random.permutation(modelb2[i][idx])

#display(modelb2)
print("then across column...")

#then permute across col
for j in range(modelb2.shape[1]):
    idy = np.nonzero(modelb2[:,j])
    modelb2[idy,j] = np.random.permutation(modelb2[idy][:,j])
    
display(modelb2)

# save model B2
with open('P_null_permute.npy', 'wb') as f:
    np.save(f, modelb2)

### Create model C

In [None]:
# null C = 0.43; Wish to increase C to 0.5 (Erdos-Renyi)
with open('Model_Files/null.npy', 'rb') as f:
    null = np.load(f)

## Uncomment code below to check C for null reservoir
#g_modelc = nx.from_numpy_array(null)
#display(nx.average_clustering(g_modelc)) # ~0.27001990732141884

# convert null reservoir to graph with networkx
g_newc = nx.from_numpy_array(null)

In [None]:
# first, order the clustering coefficients of all nodes in the reservoir (in reverse order)
clustering_per_node = nx.clustering(g_newc)
sorted_clustering = sorted(clustering_per_node.items(), key=lambda x: x[1], reverse=True)
#display(sorted_clustering)

# for only the mid-high clustering nodes add weights. First, try making list of intermediate-C nodes
# Then, add weighted edges from bootstrap to these (i.e. from the bootstrap we did for model A)
#display(sorted_clustering[500:700])

#display(len(sorted_clustering))

In [None]:
display(sorted_clustering[3993])

In [None]:
list_highcc = []
#2000 to 2600 was good
for i in range(2800,3300): 
    list_highcc.append(sorted_clustering[i][0])

In [None]:
for i in range(50000):
    # here we add new edges to intermediate nodes (randomly chosen). Which weights? Those from bootstrap (recall "vals").
    g_newc.add_weighted_edges_from([(np.random.choice(list_highcc),np.random.choice(list_highcc),np.random.choice(vals_connectome_bootstrapped))])

In [None]:
# Is our C now ~0.5? If not, re-run this cell. If so, done.
display(nx.average_clustering(g_newc))

In [None]:
# save model C once clustering coefficient matches random graph (0.5)
modelc = nx.to_numpy_array(g_newc)
with open('C_null_clustering.npy', 'wb') as f:
    np.save(f, modelc)
# final number is 0.5044721209428997

In [None]:
# as another idea, use the random graph we made to find the clustering coef.. then match that.