In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import scipy.optimize
import scipy.stats
import csv
import json
from scipy.io import loadmat
from datetime import datetime, date, time, timedelta
sns.set()

# Importing CSVs 

In [5]:
patt_data = glob.glob('coarse/patt_dfs_updated/*****_coarse.csv')

patt_dfs = []
for file in patt_data:
    subj_id = file[16:21]
    df = pd.read_csv(file, low_memory = False)
    df['ID'] = subj_id
    patt_dfs.append(df)

In [6]:
stats_data = glob.glob('stats_dfs/hour_updated/*****.csv')

stats_dfs = []
for file in stats_data:
    df = pd.read_csv(file, low_memory = False)
    stats_dfs.append(df)

In [7]:
outcome_data = glob.glob('coarse/outcome_dfs_updated/*****_coarse.csv')

outcome_dfs = []
for file in outcome_data:
    subj_id = file[19:25]
    df = pd.read_csv(file, low_memory = False)
    df['ID'] = subj_id
    outcome_dfs.append(df)

In [9]:
time_outcome_data = glob.glob('coarse/time_outcome_updated/*****_coarse.csv')

time_div_dfs = []
for file in time_outcome_data:
    subj_id = file[24:29]
    df = pd.read_csv(file, low_memory = False)
    df['ID'] = subj_id
    time_div_dfs.append(df)

In [10]:
phase_outcome_data = glob.glob('coarse/phase_outcome_updated/*****_coarse.csv')

phase_div_dfs = []
for file in time_outcome_data:
    subj_id = file[24:29]
    df = pd.read_csv(file, low_memory = False)
    df['ID'] = subj_id
    phase_div_dfs.append(df)

In [64]:
day_stats_data = glob.glob('stats_dfs/day/*****.csv')

day_stats_dfs = []
for file in day_stats_data:
    df = pd.read_csv(file, low_memory = False)
    day_stats_dfs.append(df)

# Time division function

In [21]:
def time_div_recovery(timepattdf, statsdf, bg_type):
    '''returns statistics about time to recover after pattern including high/low bg is seen divided by time'''
    timepattdf = timepattdf[timepattdf['length'] > 1]
    
    if bg_type == 'high':
        timepattdf = timepattdf[timepattdf['incl_high'] == True].copy()
        val_flag = 'TAR'
    elif bg_type == 'low':
        timepattdf = timepattdf[timepattdf['incl_low'] == True].copy()
        val_flag = 'TBR'
    
    recovery_data = []
    recovery_cols = ['pattern', 'length', 'repeats', 'ind_runs', 'time_block', 'min_recovery', 
                     'max_recovery', 'avg_recovery', 'edge']
    
    t_runs = ['time1_runs', 'time2_runs', 'time3_runs', 'time4_runs']
    times = ['time1', 'time2', 'time3', 'time4']
    for i in range(len(timepattdf)):
        pattern = timepattdf.iloc[i]['pattern']
        coord_list = timepattdf.iloc[i]['start_coords']
        length = timepattdf.iloc[i]['length']
        repeats = timepattdf.iloc[i]['repeats']
        rec_times = []
        edge = False
        
        
        for j in range(len(t_runs)):
            
            coord_list = json.loads(timepattdf.iloc[i][t_runs[j]])
            for coords in coord_list:
                start_list = [(int(coords[0]) + length), (int(coords[1]) + length)]
                for start in start_list:
                    incl_extr = True
                    count = 0
                    if start > len(statsdf) - 1:
                        edge = True
                        break
                    curr_row = statsdf.iloc[start]
                    while incl_extr == True:
                        if curr_row['TAR'] > 0:
                            count += 1
                            if (start + count > (len(statsdf) - 1)):
                                edge = True
                                break
                            curr_row = statsdf.iloc[start + count]
                        elif curr_row['TAR'] == 0: # break, bg has recovered
                            count += 1
                            incl_extr = False
                    rec_times.append(count) # add time to recovery to list
            if rec_times == []:
                min_rec, max_rec, avg_rec = 0, 0, 0
            else:
                min_rec = min(rec_times)
                max_rec = max(rec_times)
                avg_rec = sum(rec_times) / len(rec_times)
            recovery_list = [pattern, length, repeats, coord_list, times[j], min_rec, max_rec, avg_rec, edge]
            recovery_data.append(recovery_list)
        
    recoverydf = pd.DataFrame(recovery_data)
    recoverydf.columns = recovery_cols
    return recoverydf

In [18]:
test_id = str(stats_dfs[0].iloc[0]['id'])
stats_df = stats_dfs[0].copy()

time_outcome_file = 'coarse/time_outcome_updated/' + test_id +'_coarse.csv'
time_div_df = pd.read_csv(time_outcome_file, low_memory = False)

# Phase division outcome analysis

# Phase division recovery analysis

In [13]:
def phase_div_recovery(phasepattdf, statsdf, bg_type):
    '''returns statistics about time to recover after pattern including high/low bg is seen divided by time'''
    phasepattdf = phasepattdf[phasepattdf['length'] > 1]
    
    if bg_type == 'high':
        phasepattdf = phasepattdf[phasepattdf['incl_high'] == True].copy()
        val_flag = 'TAR'
    elif bg_type == 'low':
        phasepattdf = phasepattdf[phasepattdf['incl_low'] == True].copy()
        val_flag = 'TBR'
    
    recovery_data = []
    recovery_cols = ['pattern', 'length', 'repeats', 'ind_runs', 'phase', 'min_recovery', 
                     'max_recovery', 'avg_recovery', 'edge']
    
    p_runs = ['phase1_runs', 'phase2_runs', 'phase3_runs', 'phase4_runs']
    phases = ['phase1', 'phase2', 'phase3', 'phase4']
    for i in range(len(phasepattdf)):
        pattern = phasepattdf.iloc[i]['pattern']
        coord_list = phasepattdf.iloc[i]['start_coords']
        length = phasepattdf.iloc[i]['length']
        repeats = phasepattdf.iloc[i]['repeats']
        rec_times = []
        edge = False
        
        for j in range(len(p_runs)):
            
            coord_list = json.loads(phasepattdf.iloc[i][p_runs[j]])
            for coords in coord_list:
                start_list = [(int(coords[0]) + length), (int(coords[1]) + length)]
                for start in start_list:
                    incl_extr = True
                    count = 0
                    if start > len(statsdf) - 1:
                        edge = True
                        break
                    curr_row = statsdf.iloc[start]
                    while incl_extr == True:
                        if curr_row['TAR'] > 0:
                            count += 1
                            if (start + count > (len(statsdf) - 1)):
                                edge = True
                                break
                            curr_row = statsdf.iloc[start + count]
                        elif curr_row['TAR'] == 0: # break, bg has recovered
                            count += 1
                            incl_extr = False
                    rec_times.append(count) # add time to recovery to list
            if rec_times == []:
                min_rec, max_rec, avg_rec = 0, 0, 0
            else:
                min_rec = min(rec_times)
                max_rec = max(rec_times)
                avg_rec = sum(rec_times) / len(rec_times)
            recovery_list = [pattern, length, repeats, coord_list, phases[j], min_rec, max_rec, avg_rec, edge]
            recovery_data.append(recovery_list)
        
    recoverydf = pd.DataFrame(recovery_data)
    recoverydf.columns = recovery_cols
    return recoverydf

In [12]:
test_id = str(stats_dfs[0].iloc[0]['id'])
stats_df = stats_dfs[0].copy()

phase_outcome_file = 'coarse/phase_outcome_dfs/' + test_id +'_coarse.csv'
phase_div_df = pd.read_csv(phase_outcome_file, low_memory = False)

In [None]:
phase_recs_data = []
phase_cols = ['phase1', 'phase2', 'phase3', 'phase4']

for stats_df in stats_dfs:
    subj_id = str(stats_df.iloc[0]['id'])
    phase_outcome_file = 'coarse/phase_outcome_dfs/' + test_id +'_coarse.csv'
    phase_div_df = pd.read_csv(phase_outcome_file, low_memory = False)

    rec_df = phase_div_recovery(phase_div_df, stats_df, 'high')
    file_name = 'coarse/phase_rec_dfs/' + subj_id + '.csv'
    rec_df.to_csv(file_name, index = False)

    df = rec_df.copy()
    df = df[df['ind_runs'].map(lambda d: len(d)) > 0]
    for j in range(len(df)):
        pattern = df.iloc[j]['pattern']
        phase = df.iloc[j]['phase']
        avg_rec = df.iloc[j]['avg_recovery']
        phase_recs_data.append([pattern, phase, avg_rec])