## Functionality that needs to be added

- Create function to run all flies and arrange in exportable format
- Refactor
- Remove magic numbers
- Adapt to python script that can be run in terminal - takes input .csv and exports excel file

### Import Libraries that will be needed

In [99]:
import pandas as pd
import numpy as np
import datetime

### Import data from .csv

In [100]:
df_raw = pd.read_csv("Bethany_Raw.csv")
df_raw.Time = pd.to_datetime(df_raw.Time, format='%H:%M:%S').dt.time #Convert Time column to datetime.time format

### Trim to 24 hour range

In [101]:
def get_data_range(L):
    """
    Find the start and end indices for a 24 hour range starting between a given time range
    Takes arguments L - a list of datetime.time objects, lower_limit and upper_limit - datetime.time objects 
    to specificy the range in which to choose the start time
    Returns a tuple: (start_index, end_index)
    """
    lower_limit = datetime.time(10,0,0)
    upper_limit = datetime.time(10,5,0)
    start_index = 0
    for i in range(len(L)):
        if L[i] >= lower_limit and L[i] < upper_limit:
            start_index = i
            break
    return (start_index, start_index + 288) #288 is the number of rows until 24 hours later


In [102]:
# Trim raw data to 24 hour window
trim_index = get_data_range(list(df_raw.Time))
df = df_raw[trim_index[0]:trim_index[1]]
df = df.reset_index(drop=True)
df_trimmed_copy = df.copy() # to be used in final output to excel 

### Make a column of bools to indicate day vs night

In [103]:
def is_night(x):
    """
    Take datetime.time object and return True if between 10pm and 10am, False otherwise 
    """
    night_start = datetime.time(22,0,0)
    night_end = datetime.time(10,0,0)
    if (x >= night_start and x < datetime.time(23, 59, 59)) or x < night_end:
        return True
    else:
        return False

In [104]:
# Create Boolean column indicating Night=True Day=False
# In most cases this should be just split into top/bottom half of df, 
# but implented as a function in case 
df.isNight = df.Time.apply(is_night)

### Generate bout indices

In [105]:
def get_bout_indices(L):
    """
    Takes a list L and returns a list of tuples of the start/end indices in which sleeping bouts occurred.
    I.e. if two sleeping bouts occured, the first from index 5 to 20, and the second from index 30 to 40,
    this function will return [(5,20), (30,40)]
    """
    indices = []
    start_index = 1
    end_index = 1
    in_bout = False
    
    for i in range(len(L)):
        if L[i] == 0 and in_bout == False: 
            start_index = i
            in_bout = True
        if (L[i] != 0 or i == len(L)-1) and in_bout == True:
            end_index = i
            in_bout = False
            if i == len(L)-1:
                indices.append((start_index, end_index+1))
            else:
                indices.append((start_index, end_index))
    return indices 
        

In [106]:
def create_sleeping_columns(df):
    """
    Creates a new boolean column for each fly. True indicates fly is asleep for that window.
    Columns used for filters in data analysis. 
    """
    act_col = 11
    for i in range(5):
        df.loc[:,'fly' + str(i+1) + 'sleeping'] = df.iloc[:,i+act_col].apply(lambda x: x == 0)
    

In [107]:
create_sleeping_columns(df)

### Get day bouts and night bouts

In [108]:
# Following code is used to generate two new dataframes: all_day_bouts_df and all_night_bouts_df
# These will be exported to two different sheets in excel. They will contain metabolic rate data 
# occuring during individual sleeping bouts, with one bout per column. Different flies will be seperated
# with an empty column. 

colnames = df.columns[11:16] # Ordered list of fly genotype name strings
def get_day_night_bouts(df, resp_colnum, activity_colnum):
    """
    Gets lists of mr rates for a single fly during during indepedent sleeping bouts during day/night. 
    Takes the dataframe, the colnumn number for resp data, column number for sleep data
    Returns a tuple of two lists of lists: resp data for daytime sleep bouts, resp data for nighttime sleep bouts
    """
    bout_indices = get_bout_indices(list(df.iloc[:,activity_colnum]))
    resp_bouts_day = []
    resp_bouts_night = []
    for i in bout_indices:
        if df.isNight[i[0]:i[1]].mean() >= 0.5:
            resp_bouts_night.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
        else:
            resp_bouts_day.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
    if not resp_bouts_day:
        resp_bouts_day = ['none']
    if not resp_bouts_night:
        resp_bouts_night = ['none']
    return resp_bouts_day, resp_bouts_night

def get_all_bouts(df, colnames):
    """
    Returns a tuple of two dictionaries - individual bouts for each fly for 1) day sleep, 2) night sleep
    Dictionaries format - key: genotype; value: list of lists containing mr for individual sleeping bouts
    """
    mr_col = 4
    act_col = 11
    colnames = df.columns[11:16]
    all_day_bouts = {x: [] for x in colnames}
    all_night_bouts = {x: [] for x in colnames}
    for i in range(5):
        day_bouts, night_bouts = get_day_night_bouts(df, mr_col, act_col)
        all_day_bouts[df.columns[i+11]] += day_bouts 
        all_night_bouts[df.columns[i+11]] += night_bouts 
        mr_col+=1
        act_col+=1
    return all_day_bouts, all_night_bouts

In [109]:
day_bouts_dict, night_bouts_dict = get_all_bouts(df, colnames)

In [110]:
def get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames):
    all_bouts_day_list = []
    all_bouts_night_list = []
    for i in colnames:
        all_bouts_day_list += day_bouts_dict[i] + [[]] # empty lists between columns indicate different flies
        all_bouts_night_list += night_bouts_dict[i] + [[]]
    return all_bouts_day_list, all_bouts_night_list

In [111]:
all_bouts_day_list, all_bouts_night_list = get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames)

In [112]:
# Create dataframes for day/night bouts
all_day_bouts_df = pd.DataFrame(all_bouts_day_list).transpose()
all_night_bouts_df = pd.DataFrame(all_bouts_night_list).transpose()

In [113]:
all_day_bouts_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.039541,1.180896,1.284699,1.227334,1.18559,1.130278,0.986619,,1.098401,,...,1.366439,1.471703,1.284412,,1.592927,1.589793,1.502461,1.616515,1.838532,
1,1.427665,1.383591,1.285172,1.295008,1.095631,1.101771,1.009748,,,,...,1.33013,1.332372,1.294859,,1.610929,,,1.53017,1.89097,
2,,1.294287,,1.206533,1.199461,1.044584,0.936784,,,,...,1.410944,1.315407,1.380678,,1.390133,,,1.537473,,
3,,1.249192,,,,1.026278,0.938204,,,,...,1.311169,1.319408,1.33691,,,,,1.466006,,
4,,1.326397,,,,8.365838,0.951994,,,,...,1.582302,1.280412,1.348364,,,,,,,
5,,1.331773,,,,0.910308,0.950158,,,,...,1.514232,1.296456,1.371576,,,,,,,
6,,1.148687,,,,1.019505,0.945871,,,,...,1.406155,1.219918,1.390585,,,,,,,
7,,1.122283,,,,1.070171,0.94516,,,,...,1.374179,1.174965,,,,,,,,
8,,1.248453,,,,,0.924083,,,,...,1.469105,1.379992,,,,,,,,
9,,1.113087,,,,,1.021218,,,,...,,1.277233,,,,,,,,


### Get sleep profiles

In [114]:
# The following code is used to create the sleep_profile dataframe, which will be exported as it's own sheet
# in the excel output. 
def get_sleep_profile(df, resp_colnum, activity_colnum):
    """
    Returns a list of tuples with sum of metabolic rate, the percentage sleep, and total beam breaks 
    for each hour in the 24 hour window, for one individual fly
    """
    fly_activity = df.iloc[:,activity_colnum]
    fly_resp = df.iloc[:,resp_colnum]
    hourly_resp_sleep = []
    
    for i in range(24):
        activity = fly_activity[i*12:i*12+12]
        resp = fly_resp[i*12:i*12+12]
        num_sleep_blocks = 0
        for i in activity:
            if i == 0:
                num_sleep_blocks += 1
        sleep_avg = num_sleep_blocks / 12 * 100
        hourly_resp_sleep.append((resp.sum(), sleep_avg, activity.sum()))
        
    return hourly_resp_sleep

def make_sleep_profile_dict(df):
    """
    Returns a dictionary containing a time index and the sleep profiles (both metabolic rate sum and average sleep)
    for each fly in the dataframe
    """
    mr_col = 4
    act_col = 11
    sleep_profile_dict = {}
    sleep_profile_dict['Time'] = [df.Time[x*12-1] for x in range(1, 25)]
    
    def add_to_dict(name, index):
        sleep_profile_dict[df.columns[act_col] + name] = [x[index] for x in sleep_profile]
        
    for i in range(5):
        sleep_profile = get_sleep_profile(df, mr_col, act_col)
        add_to_dict(' MR Sum', 0)
        add_to_dict(' Avg Sleep', 1)
        add_to_dict(' Beam Breaks', 2)
        mr_col+=1
        act_col+=1
        
    return sleep_profile_dict

def make_sleep_profile_colnames(df):
    """
    Create list of ordered column names for dataframe to be created from sleep_profile dictionary
    """
    colnames = ['Time']
    
    for i in range(11,16):
        colnames.append(df.columns[i] + ' MR Sum')
        colnames.append(df.columns[i] + ' Avg Sleep')
        colnames.append(df.columns[i] + ' Beam Breaks')
        
    return colnames

In [115]:
sleep_profile_dict = make_sleep_profile_dict(df)
sleep_profile_df = pd.DataFrame(sleep_profile_dict, columns=make_sleep_profile_colnames(df))

In [116]:
# The following code is used to analyze the data processed above, and generate three dataframes, 
# which will become three sheets in the final excel output. 

#Total Sleep 
def get_sleep_minutes_df(df, colnames):
    total_sleep_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        total_sleep_list[i].append(df.iloc[:,i+16].sum()*5) # multiply by 5 to make per minute (data are in 5 min bins)
        total_sleep_list[i].append(df.iloc[:,i+16][df.isNight == False].sum()*5)
        total_sleep_list[i].append(df.iloc[:,i+16][df.isNight == True].sum()*5)
    return pd.DataFrame(total_sleep_list, columns=['Fly', 'Total Sleep Min',
                                                   'Total Day Sleep Min', 'Total Night Sleep Min'])

In [117]:
sleep_minutes_df = get_sleep_minutes_df(df, colnames)

In [118]:
# Wake Sleep MR
def get_wake_sleep_mr_df(df, colnames):
    wake_sleep_mr_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.isNight == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.isNight == True].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True][df.isNight == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True][df.isNight == True].mean())
    return pd.DataFrame(wake_sleep_mr_list, columns=['Fly', 'Mean Wake MR', 'Mean Sleep MR', 'Mean Day MR', 
                                                     'Mean Night MR', 'Mean Day Sleep MR', 'Mean Night Sleep MR'])

In [119]:
wake_sleep_mr_df = get_wake_sleep_mr_df(df, colnames)

In [120]:
wake_sleep_mr_df

Unnamed: 0,Fly,Mean Wake MR,Mean Sleep MR,Mean Day MR,Mean Night MR,Mean Day Sleep MR,Mean Night Sleep MR
0,w1118 BL 6326,1.201862,1.170727,1.235784,1.13248,1.26581,1.1187
1,w1118 BL 6326.1,1.631928,1.023106,1.646857,1.071596,1.098401,1.022518
2,eaat2 e04636,1.380552,0.890217,1.413147,0.908699,1.461901,0.88575
3,eaat2 e04636.1,1.453698,1.159332,1.36058,1.140018,1.242371,1.108024
4,eaat2 e04636.2,1.683387,1.53855,1.607447,1.655728,1.596901,1.531573


In [121]:
# Mean Hourly Sleep MR
def get_mean_hourly_sleep_mr_df(df, sleep_profile_df, colnames):
    mr_hourly = sleep_profile_df.iloc[:,1::3]
    mr_hourly.isNight = pd.Series([x > len(mr_hourly)/2-1 for x in range(len(mr_hourly))]) 
    mean_hourly_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        mean_hourly_list[i].append(mr_hourly.iloc[:,i].mean())
        mean_hourly_list[i].append(mr_hourly.iloc[:,i][mr_hourly.isNight == False].mean())
        mean_hourly_list[i].append(mr_hourly.iloc[:,i][mr_hourly.isNight == True].mean())
    return pd.DataFrame(mean_hourly_list, columns=['Fly', 'Mean Hourly MR Total', 'Mean Hourly MR Day', 
                                                   'Mean Hourly MR Night'])

In [122]:
mean_hourly_sleep_mr_df = get_mean_hourly_sleep_mr_df(df, sleep_profile_df, colnames)

In [123]:
mean_hourly_sleep_mr_df

Unnamed: 0,Fly,Mean Hourly MR Total,Mean Hourly MR Day,Mean Hourly MR Night
0,w1118 BL 6326,14.209586,14.829413,13.58976
1,w1118 BL 6326.1,16.310717,19.762282,12.859151
2,eaat2 e04636,13.931072,16.957759,10.904384
3,eaat2 e04636.1,15.00359,16.326964,13.680216
4,eaat2 e04636.2,19.579051,19.28937,19.868732


## Export to .xlsx

In [130]:
# Convert datetime.time columns back to str for correct formatting in Excel
sleep_profile_df.Time = sleep_profile_df.Time.astype(str)
df_trimmed_copy.Time = df_trimmed_copy.Time.astype(str)

def excel_out(df, name, **kwargs):
    """
    Wrapper function - Takes a dataframe, and a desired sheet name (string).
    Sends to new sheet in excel output
    """
    df.to_excel(writer, sheet_name = name, index = False, **kwargs)
    
writer = pd.ExcelWriter('test_out.xlsx', engine='xlsxwriter')
excel_out(df_trimmed_copy, 'Trimmed Analysis')
excel_out(all_day_bouts_df, 'All Day Bouts', header = False)
excel_out(all_night_bouts_df, 'All Night Bouts', header = False)
excel_out(sleep_profile_df, 'Sleep Profile')
excel_out(sleep_minutes_df, 'Min. of Sleep')
excel_out(wake_sleep_mr_df, 'Wake Sleep MR')
excel_out(mean_hourly_sleep_mr_df, 'Mean Hourly Sleep')

#df_trimmed_copy.to_excel(writer, sheet_name='Trimmed Analysis', index = False)
#all_day_bouts_df.to_excel(writer, sheet_name='All Day Bouts', index = False, header = False)
#all_night_bouts_df.to_excel(writer, sheet_name='All Night Bouts', index = False, header = False)
#sleep_profile_df.to_excel(writer, sheet_name='Sleep Profile', index = False)
#sleep_minutes_df.to_excel(writer, sheet_name='Min. of Sleep', index = False)
#wake_sleep_mr_df.to_excel(writer, sheet_name='Wake Sleep MR', index = False)
#mean_hourly_sleep_mr_df.to_excel(writer, sheet_name='Mean Hourly Sleep', index = False)
writer.save()

In [44]:
sleep_profile_df.Time = sleep_profile_df.Time.apply(lambda x: str(x))

In [46]:
sleep_profile_df.Time.astype(str)

0     10:56:04
1     11:56:04
2     12:56:04
3     13:56:04
4     14:56:04
5     15:56:04
6     16:56:04
7     17:56:04
8     18:56:04
9     19:56:05
10    20:56:05
11    21:56:05
12    22:56:05
13    23:56:05
14    00:56:05
15    01:56:05
16    02:56:05
17    03:56:05
18    04:56:05
19    05:56:05
20    06:56:05
21    07:56:06
22    08:56:06
23    09:56:06
Name: Time, dtype: object