In [1]:
import pandas as pd
import numpy as np
import math
import random
import pickle
from itertools import combinations
import itertools

In [2]:
data = pd.read_csv('Five_Species_Coupled_Food_Chain_Model_res_13.csv', index_col=0).iloc[:30000,1:]

Generating valid lags for each combination of (target, E, Tp, exclusion_radius):  

targets = ['R', 'C1', 'C2', 'P1', 'P2']  
Es = [3, 5, 7, 9, 11]  
Tps = [-3, -2, -1, 0, 1, 2, 3]  
exclusion_radii = [0, 1, 2]

Tp = 0 is the case of filling a missing value in the target variable when other variables have observations.  
(Tp, exclusion_radius) = (0,0), (0,1), (0,2)  

Tp != 0 is the case of predicting into the center of a gap in all variables.  
(Tp, exclusion_radius) = (1,0), (-1,0), (2,1), (-2,1), (3,2), (-3,2)

if Tp = 0:  
- remove [-exclusion_radius, exclusion_radius] lags of target variable from valid_lags

if Tp != 0:
- remove sgn(Tp) * [1, 2 * r + 1] lags of all variables from valid_lags
- remove 0 lag of target variable from valid_lags

In [3]:
# Parameters
system_variables = ['R', 'C1', 'C2', 'P1', 'P2']  # List of variables in dynamical system
num_lags         = 1                              # Number of forwards and backwards lags
sample           = 10000                           # Number of random models to subsample

In [4]:
def get_lag_data(data, system_variables, num_lags, tau=1):
    ''' Get a dataframe with all the possible valid lags of the variables. '''
    
    backward_lags = pd.concat([data[var].shift(lag*tau).rename(f'{var}(t-{lag*tau})') for lag in range(num_lags+1) for var in system_variables], axis=1)
    forward_lags  = pd.concat([data[var].shift(-1*lag*tau).rename(f'{var}(t+{lag*tau})') for lag in range(1,num_lags+1) for var in system_variables], axis=1)
    lag_data = pd.concat([backward_lags, forward_lags], axis=1)
    
    return lag_data

lag_data = get_lag_data(data, system_variables, num_lags)
#lag_data.to_csv('block.csv')

# Get Exhaustive List of Embeddings

In [12]:
random_embeddings = {}
all_lags = lag_data.columns.tolist()
for target in ['R', 'C1', 'C2', 'P1', 'P2']:
    print(target)
    for E in [4,5,6,7]:
        for (Tp, exclusion_radius) in [[0,0], [0,1],                                   # Tp = 0
                                       [1,0], [-1,0], [2,1], [-2,1], [3,2], [-3,2]]:   # Tp != 0
            valid_lags = all_lags.copy()
            
            # Remove (t-0) lag of target variable from valid_lags
            valid_lags = [x for x in valid_lags if x != f'{target}(t-0)']
            
            # If Tp = 0, remove [-exclusion_radius, exclusion_radius] lags of target variable from valid lags
            if Tp == 0:
                for r in range(-exclusion_radius, exclusion_radius+1):
                    if r < 0:
                        valid_lags = [x for x in valid_lags if x != f'{target}(t{r})']
                    elif r == 0:
                        valid_lags = [x for x in valid_lags if x != f'{target}(t-{r})']
                    elif r > 0:
                        valid_lags = [x for x in valid_lags if x != f'{target}(t+{r})']
            
            # If Tp != 0, remove sgn(Tp)*[1,2r+1] lags of all variables from valid lags
            elif Tp != 0:
                sgn_Tp = int(math.copysign(1, Tp))
                lags_to_remove = [sgn_Tp * l for l in list(range(1,2*exclusion_radius+1 + 1))]
                for lag in lags_to_remove:
                    for var in system_variables:
                        if lag < 0:
                            valid_lags = [x for x in valid_lags if x != f'{var}(t{lag})']
                        elif lag == 0:
                            valid_lags = [x for x in valid_lags if x != f'{var}(t-{lag})']
                        elif lag > 0:
                            valid_lags = [x for x in valid_lags if x != f'{var}(t+{lag})']
            
            # Get random embeddings using valid lags
            all_embeddings = list(itertools.combinations(valid_lags, E))
            all_embeddings = [list(el) for el in all_embeddings]
            random_embeddings['{0}'.format((target, E, Tp, exclusion_radius))] = all_embeddings
            
            print(f'E = {E}, Tp = {Tp}, gap_radius = {exclusion_radius}, # embeddings: {len(all_embeddings)}')
            
# Save the dictionary to a file
# with open('all_embeddings_1_lags.pkl', 'wb') as file:
#     pickle.dump(random_embeddings, file)

R
E = 4, Tp = 0, gap_radius = 0, # embeddings: 46376
E = 4, Tp = 0, gap_radius = 1, # embeddings: 35960
E = 4, Tp = 0, gap_radius = 2, # embeddings: 27405
E = 4, Tp = 0, gap_radius = 3, # embeddings: 20475
E = 4, Tp = 1, gap_radius = 0, # embeddings: 23751
E = 4, Tp = -1, gap_radius = 0, # embeddings: 23751
E = 4, Tp = 2, gap_radius = 1, # embeddings: 3876
E = 4, Tp = -2, gap_radius = 1, # embeddings: 3876
E = 4, Tp = 3, gap_radius = 2, # embeddings: 3876
E = 4, Tp = -3, gap_radius = 2, # embeddings: 3876
E = 5, Tp = 0, gap_radius = 0, # embeddings: 278256
E = 5, Tp = 0, gap_radius = 1, # embeddings: 201376
E = 5, Tp = 0, gap_radius = 2, # embeddings: 142506
E = 5, Tp = 0, gap_radius = 3, # embeddings: 98280
E = 5, Tp = 1, gap_radius = 0, # embeddings: 118755
E = 5, Tp = -1, gap_radius = 0, # embeddings: 118755
E = 5, Tp = 2, gap_radius = 1, # embeddings: 11628
E = 5, Tp = -2, gap_radius = 1, # embeddings: 11628
E = 5, Tp = 3, gap_radius = 2, # embeddings: 11628
E = 5, Tp = -3, gap_rad

E = 7, Tp = -3, gap_radius = 2, # embeddings: 50388
P2
E = 4, Tp = 0, gap_radius = 0, # embeddings: 46376
E = 4, Tp = 0, gap_radius = 1, # embeddings: 35960
E = 4, Tp = 0, gap_radius = 2, # embeddings: 27405
E = 4, Tp = 0, gap_radius = 3, # embeddings: 20475
E = 4, Tp = 1, gap_radius = 0, # embeddings: 23751
E = 4, Tp = -1, gap_radius = 0, # embeddings: 23751
E = 4, Tp = 2, gap_radius = 1, # embeddings: 3876
E = 4, Tp = -2, gap_radius = 1, # embeddings: 3876
E = 4, Tp = 3, gap_radius = 2, # embeddings: 3876
E = 4, Tp = -3, gap_radius = 2, # embeddings: 3876
E = 5, Tp = 0, gap_radius = 0, # embeddings: 278256
E = 5, Tp = 0, gap_radius = 1, # embeddings: 201376
E = 5, Tp = 0, gap_radius = 2, # embeddings: 142506
E = 5, Tp = 0, gap_radius = 3, # embeddings: 98280
E = 5, Tp = 1, gap_radius = 0, # embeddings: 118755
E = 5, Tp = -1, gap_radius = 0, # embeddings: 118755
E = 5, Tp = 2, gap_radius = 1, # embeddings: 11628
E = 5, Tp = -2, gap_radius = 1, # embeddings: 11628
E = 5, Tp = 3, gap_ra

In [None]:
# libraries = {}

# for libsize in [25, 50, 100, 200, 300]:
#     start = 0
#     end = 30000
#     ranges = []

#     for i in range(start, end + 1, libsize):
#         if len(ranges) >= 100:  # Check if the length is 100
#             break
#         ranges.append([i, min(i + libsize - 1, end)])

#     libraries['{0}'.format(libsize)] = ranges
    
# # Save the dictionary to a file
# with open('libraries.pkl', 'wb') as file:
#     pickle.dump(libraries, file)