In [6]:
import pandas as pd
import numpy as np
from dataclasses import dataclass


#define data classes:
@dataclass
class Run:
    id: int
    decay: float
    touchTransferFraction: float
    counts: list[int]
    occupancies: list[int]
    cdffs: list[int]
    anyCps: list[int]

@dataclass
class Sample:
        run: int
        startDay: int
        decay: float
        touchTransferFractions: float
        counts: list[int]
        occupancies: list[int]
        cdiffs: list[int]
        anyCps: list[int]
        


In [7]:
# read the simulated data into a dataframe.  Indexed on the run and sorted by tick.

data = pd.read_csv('data/sim_data.csv', index_col=['run']).sort_index().sort_values(by="tick")
data.head()

Unnamed: 0_level_0,count,tick,decayRate,surfaceTransferFraction,CDIFF,occupancy,anyCP
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
16,23.0,90.0,0.590993,0.939448,0.0,15,2.0
8,0.0,90.0,0.789146,0.665336,0.0,15,5.0
6,2.0,90.0,0.728805,0.799395,0.0,16,3.0
19,5.0,90.0,0.781537,0.443818,2.0,12,2.0
2,0.0,90.0,0.539657,0.258963,0.0,20,4.0


In [39]:
# How many runs do we have and what are their lengths?
for i in data.index.unique():
    print(i, ": ", len(data.loc[i]))
    
runs = []

#create individual run objects for each run in the data.  Each run object will have a list of counts, occupancies, cdffs, and anyCps
for i in data.index.unique():
    print(i)
    print(data.loc[i].decayRate.iloc[1])
    run = Run(id=i, decay=data.loc[i].decayRate.iloc[0], touchTransferFraction=data.loc[i].surfaceTransferFraction.iloc[0], counts=data.loc[i].count, occupancies=data.loc[i].occupancy, cdffs=data.loc[i].CDIFF, anyCps=data.loc[i].anyCP)
    runs.append(run)

# print(runs)

16 :  276
8 :  276
6 :  276
19 :  276
2 :  276
13 :  276
18 :  276
12 :  276
20 :  276
14 :  276
4 :  276
3 :  276
10 :  276
15 :  276
7 :  276
11 :  276
1 :  276
5 :  276
9 :  276
21 :  276
17 :  276
16
0.5909927396569401
8
0.7891460747923702
6
0.7288053056690842
19
0.7815373248886317
2
0.539657253306359
13
0.0808097256813198
18
0.6123686786741018
12
0.7731850945856422
20
0.1230096088256686
14
0.0879190876148641
4
0.5205552245024592
3
0.3215222228318453
10
0.8720119358040392
15
0.9614504396449776
7
0.5769566630478948
11
0.4283604077063501
1
0.7347467318177223
5
0.3270599981769919
9
0.105979919899255
21
0.1865084439050406
17
0.0564352194778621
[Run(id=16, decay=np.float64(0.5909927396569401), touchTransferFraction=np.float64(0.9394482094794512), counts=<bound method DataFrame.count of      count   tick  decayRate  surfaceTransferFraction  CDIFF  occupancy  anyCP
run                                                                           
16    23.0   90.0   0.590993                 0

In [26]:
#define a function to split the sequences into lists of n steps (we have 56 days of observed data)
#we will split the simulated data into sequences n long (90-136, 91-137, 92-138, etc.)

def split_sequences(run, number_of_steps):
    start = int(run.tick.min())
    end = int(run.tick.max())-int(number_of_steps)
    print(start, "-", end)
    print(run)
    run_samples = []
    for i in np.arange(start, end):
        sample = Sample(run=run.index[0], startDay=i, decay=run.decayRate, touchTransferFractions=run.surfaceTransferFraction, counts=run.count[i:i+number_of_steps], occupancies=run.occupancy[i:i+number_of_steps], cdiffs=run.CDIFF[i:i+number_of_steps], anyCps=run.anyCp[i:i+number_of_steps])
        run_samples.append(sample)
    return run_samples

In [27]:
samples = split_sequences(data.loc[1]  , 46)
print(samples)
#run = 15
#ticks = data.loc[run]
#print(ticks)

90 - 319
     count   tick  decayRate  surfaceTransferFraction  CDIFF  occupancy  anyCP
run                                                                           
1      2.0   90.0   0.734747                  0.06384    1.0         20    3.0
1      2.0   91.0   0.734747                  0.06384    1.0         19    3.0
1      3.0   92.0   0.734747                  0.06384    2.0         20    3.0
1      4.0   93.0   0.734747                  0.06384    2.0         15    1.0
1      4.0   94.0   0.734747                  0.06384    2.0         14    1.0
..     ...    ...        ...                      ...    ...        ...    ...
1      0.0  361.0   0.734747                  0.06384    0.0         20    5.0
1      0.0  362.0   0.734747                  0.06384    0.0         18    5.0
1      0.0  363.0   0.734747                  0.06384    0.0         18    3.0
1      0.0  364.0   0.734747                  0.06384    0.0         18    3.0
1      0.0  365.0   0.734747               

TypeError: 'method' object is not subscriptable