# Grouping variables 
In a dataFrame of %count, Total count, % duration, Total duration of vigilance states and their transitions 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from ggplot import *
import seaborn as sns
import statsmodels.api as sm
import glob
import os

from sklearn import linear_model
from lifelines import KaplanMeierFitter


os.chdir('/Users/bmr225/Documents/PythonCodes/Survival_Data')

In [None]:
def load_data(phenotype,phase,condition):
    df = pd.read_csv(phenotype + "_"+ phase + "_" + condition + "_grouped.csv")
    return df

In [None]:
def dur_metrics(phenotype, phase, condition):
    
    df = load_data(phenotype,phase,condition)   
       
    
    #of course a function and a loop would take care of that.. 
    
    
    # count data of NREM (N-W & N-R)
    df_grouped_NREM_c = grouping_data_mouse_count(df,'To','Duration') # NREM 
    NW_series_all_mice = df_grouped_NREM_c['NREM']['N-W']
    df_NW_count_per_mouse = NW_series_all_mice.to_frame().reset_index()
    df_per_mouse = df_NW_count_per_mouse.rename(columns={'Duration':'N-W_count'})
    NR_series_all_mice = df_grouped_NREM_c['NREM']['N-R']
    df_NR_count_per_mouse = NR_series_all_mice.to_frame().reset_index()
    df_per_mouse['N-R_count'] = df_NR_count_per_mouse['Duration'] 
    NR_series_all_mice = df_grouped_NREM_c['Wake']['W-N']
    df_NR_count_per_mouse = NR_series_all_mice.to_frame().reset_index()
    df_per_mouse['W-N_count'] = df_NR_count_per_mouse['Duration']
    
    # sum of duration of NREM (N-W & N-R)
    df_grouped_NREM_s = grouping_data_mouse_sum(df,'To','Duration') # NREM 
    NW_series_all_mice = df_grouped_NREM_s['NREM']['N-W']
    df_NW_sum_per_mouse = NW_series_all_mice.to_frame().reset_index()
    df_per_mouse['N-W_sum'] = df_NW_sum_per_mouse['Duration']
    NR_series_all_mice = df_grouped_NREM_s['NREM']['N-R']
    df_NR_sum_per_mouse = NR_series_all_mice.to_frame().reset_index()
    df_per_mouse['N-R_sum'] = df_NR_sum_per_mouse['Duration']
    
    
    # count data of NREM (WfN & WfR)
    df_grouped_Wake_c = grouping_data_mouse_count(df,'From','Duration') # NREM 
    WfN_series_all_mice = df_grouped_Wake_c['Wake']['WfN']
    df_WfN_count_per_mouse = WfN_series_all_mice.to_frame().reset_index()
    df_per_mouse['WfN_count'] = df_WfN_count_per_mouse['Duration']
    WfR_series_all_mice = df_grouped_Wake_c['Wake']['WfR']
    df_WfR_count_per_mouse = WfR_series_all_mice.to_frame().reset_index()
    df_per_mouse['WfR_count'] = df_WfR_count_per_mouse['Duration']
    
    # sum of duration of NREM (N-W & N-R)
    df_grouped_Wake_s = grouping_data_mouse_sum(df,'From','Duration') # NREM 
    WfN_series_all_mice = df_grouped_Wake_s['Wake']['WfN']
    df_WfN_sum_per_mouse = WfN_series_all_mice.to_frame().reset_index()
    df_per_mouse['WfN_sum'] = df_WfN_sum_per_mouse['Duration']
    WfR_series_all_mice = df_grouped_Wake_s['Wake']['WfR']
    df_WfR_sum_per_mouse = WfR_series_all_mice.to_frame().reset_index()
    df_per_mouse['WfR_sum'] = df_WfR_sum_per_mouse['Duration']
    
    
    # count data of REM (RfN)
    df_grouped_REM_c = grouping_data_mouse_count(df,'From','Duration') # NREM 
    RfN_series_all_mice = df_grouped_REM_c['REM']['RfN']
    df_RfN_count_per_mouse = RfN_series_all_mice.to_frame().reset_index()
    df_per_mouse['RfN_count'] = df_RfN_count_per_mouse['Duration']
    
    
    # sum of duration of NREM (RfN)
    df_grouped_REM_s = grouping_data_mouse_sum(df,'From','Duration') # NREM 
    RfN_series_all_mice = df_grouped_REM_s['REM']['RfN']
    df_RfN_sum_per_mouse = RfN_series_all_mice.to_frame().reset_index()
    df_per_mouse['RfN_sum'] = df_RfN_sum_per_mouse['Duration']
    
    #Some minor feature engineering: usually done after generation of the df... hmmmm
    df_per_mouse['Trans_sum'] = df_per_mouse['N-W_sum'] + df_per_mouse['N-R_sum'] + df_per_mouse['RfN_sum'] + df_per_mouse['WfR_sum'] + df_per_mouse['WfN_sum']
    df_per_mouse['Dur_Other'] = 43200 - df_per_mouse['Trans_sum']
    df_per_mouse['% N-W_dur'] = df_per_mouse['N-W_sum'].apply(lambda x: x/43200)
    df_per_mouse['% N-R_dur'] = df_per_mouse.apply(lambda x: x['N-R_sum']/43200, axis = 1)
    df_per_mouse['% RfN_dur'] = df_per_mouse.apply(lambda x: x['RfN_sum']/43200, axis = 1)
    df_per_mouse['% WfR_dur'] = df_per_mouse.apply(lambda x: x['WfR_sum']/43200, axis = 1)
    df_per_mouse['% WfN_dur'] = df_per_mouse.apply(lambda x: x['WfN_sum']/43200, axis = 1)
    
      
    df_per_mouse['Trans_count'] = df_per_mouse.apply(lambda x: x['N-W_count'] + x['N-R_count'] + x['RfN_count'] + x['WfR_count'] + x['WfN_count'], axis = 1)
    # Not including W-N here as it is included in WfN & WfR
    
    
    df_per_mouse['Phenotype'] = df_per_mouse.apply(lambda x: phenotype, axis = 1)
    df_per_mouse['Condition'] = df_per_mouse.apply(lambda x: condition, axis = 1)
    df_per_mouse['Phase'] = df_per_mouse.apply(lambda x: phase, axis = 1)
    

    return df_per_mouse

In [None]:
# Saving info in a dataframe for all phenotypes, phases, conditions

phenotypes =['Susceptible', 'Resilient', 'Control']
phases = ['Light', 'Dark']
conditions =['pre','post']


ind = 0
for phenotype in phenotypes:
    for phase in phases:
        for condition in conditions: 
            df = dur_metrics(phenotype, phase, condition)
            if ind ==0:
               orig = df
            else:
                orig = orig.append(df)
            ind +=1
            

df_res = orig

df_res['% N-W_count'] = df_res.apply(lambda x: x['N-W_count']/x['Trans_count'], axis = 1)
df_res['% N-R_count'] = df_res.apply(lambda x: x['N-R_count']/x['Trans_count'], axis = 1 )
df_res['% W-N_count'] = df_res.apply(lambda x: x['W-N_count']/x['Trans_count'], axis = 1 )
df_res['% RfN_count'] = df_res.apply(lambda x: x['RfN_count']/x['Trans_count'] , axis = 1)
df_res['% WfR_count'] = df_res.apply(lambda x: x['WfR_count']/x['Trans_count'] , axis = 1)
df_res['% WfN_count'] = df_res.apply(lambda x: x['WfN_count']/x['Trans_count'] , axis = 1)
    

df_res.to_csv('count+Dur_stateTransitions_0424.csv', index = False)