# Analysing mock datasets

In [1]:
# packages

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

import sys
sys.path.append("../src")

from analysis import *
from inference import *

In [2]:
import re

def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 
    return sorted(l, key=alphanum_key)

In [3]:
datapath = "../experiment_outputs/test2/datasets/"
datafiles = os.listdir(datapath)
metadatafiles = [f"metadata{i.split('dataset')[1].split('csv')[0]}txt"\
                for i in datafiles]

datafiles = natural_sort(datafiles)
metadatafiles = natural_sort(metadatafiles)

In [4]:
df = pd.read_csv(datapath+datafiles[0], index_col=0)
metatext = open(f"{datapath}../metadata/"+metadatafiles[0], "r").read().split("\n")
df

Unnamed: 0,dataset,init_cond_idx,t_samp_dist_idx,measurement_noise,time,dt,sp1,sp2
0,0.0,0.0,0.0,0.0,0.000000,3.333333,0.432183,0.441384
1,0.0,0.0,0.0,0.0,3.333333,3.333333,0.424884,0.453830
2,0.0,0.0,0.0,0.0,6.666667,3.333333,0.510088,0.552791
3,0.0,0.0,0.0,0.0,10.000000,3.333333,0.489305,0.507222
4,0.0,0.0,0.0,0.0,13.333333,3.333333,0.381677,0.517997
...,...,...,...,...,...,...,...,...
295,19.0,4.0,1.0,0.1,23.684211,1.578947,0.410460,0.487839
296,19.0,4.0,1.0,0.1,25.263158,1.578947,0.419151,0.541748
297,19.0,4.0,1.0,0.1,26.842105,1.578947,0.526454,0.526271
298,19.0,4.0,1.0,0.1,28.421053,1.578947,0.367020,0.567186


In [5]:
def get_meta(metatext):
    meta = {}
    for n, line in enumerate(metatext):
        if "initial conditions" in line:
            init_cond_ln_idx = n

        elif "sampling timepoints" in line:
            t_samp_ln_idx = n

        elif "parameters" in line:
            meta["parameters"] = np.array([np.float64(j) for j in line.split(": ")[1].split(",")])
            params_ln_idx = n
        
        elif "measurement noise" in line:
            meta["meas_noise"] = np.array([np.float64(j) for j in line.split(": ")[1].split(",")])
            meas_noise_ln_idx = n
            break

    meta["init_cond"] = np.array([[np.float64(i) for i in metatext[j].split(",")] \
                                  for j in range(init_cond_ln_idx+1, t_samp_ln_idx)])

    meta["t_samp"] = [[np.float64(i) for i in metatext[j].split(",")] \
                                  for j in range(t_samp_ln_idx+1, params_ln_idx)]

    for i in range(meas_noise_ln_idx+1, len(metatext)-1):
        key, val = metatext[i].split(": ")
        meta[key] = np.float64(val)

    key, val = metatext[-1].split(": ")
    meta[key] = bool(val)

    return meta

In [6]:
get_meta(metatext)

ValueError: could not convert string to float: '0.0,0.1'

In [None]:
df

Unnamed: 0,dataset,init_cond_idx,t_samp_dist_idx,measurement_noise,time,dt,sp1,sp2
0,0.0,0.0,0.0,0.0,0.000000,3.333333,0.432183,0.441384
1,0.0,0.0,0.0,0.0,3.333333,3.333333,0.424884,0.453830
2,0.0,0.0,0.0,0.0,6.666667,3.333333,0.510088,0.552791
3,0.0,0.0,0.0,0.0,10.000000,3.333333,0.489305,0.507222
4,0.0,0.0,0.0,0.0,13.333333,3.333333,0.381677,0.517997
...,...,...,...,...,...,...,...,...
295,19.0,4.0,1.0,0.1,23.684211,1.578947,0.410460,0.487839
296,19.0,4.0,1.0,0.1,25.263158,1.578947,0.419151,0.541748
297,19.0,4.0,1.0,0.1,26.842105,1.578947,0.526454,0.526271
298,19.0,4.0,1.0,0.1,28.421053,1.578947,0.367020,0.567186


In [None]:
get_meta(metatext)

ValueError: could not convert string to float: ''

In [None]:
datasets = [df[df.dataset==i] for i in df.dataset.unique()]