## Functionality that needs to be added

- ~~Trim Raw Input~~
- ~~Night vs Day bouts (critera - choose where over 50% of bout is spent, in case of tie choose night)~~
- ~~Create Sleep Profile~~
- ~~Create summary stats~~
- Create function to run all flies and arrange in exportable format
- Add sum of beam breaks within one hour to sleep profile 
- ~~Write to seperate sheets in Excel~~
- Refactor
- Make test cases and create Exceptions
- Adapt to python script that can be run in terminal - takes input .csv and exports excel file

### Import Libraries that will be needed

In [1]:
import pandas as pd
import numpy as np
import datetime

### Import data from .csv

In [2]:
df_raw = pd.read_csv("Bethany_Raw.csv")

### Trim to 24 hour range

In [3]:
lower_limit = datetime.time(10,0,0)
upper_limit = datetime.time(10,5,0)
def get_data_range(L, lower_limit, upper_limit):
    """
    Find the start and end indices for a 24 hour range starting between a given time range
    Takes arguments L - a list of datetime.time objects, lower_limit and upper_limit - datetime.time objects 
    to specificy the range in which to choose the start time
    Returns a tuple: (start_index, end_index)
    """
    start_index = 0
    for i in range(len(L)):
        if L[i] >= lower_limit and L[i] < upper_limit:
            start_index = i
            break
    return (start_index, start_index + 288)


In [4]:
# Trim raw data to 24 hour window
df_raw.Time = pd.to_datetime(df_raw.Time, format='%H:%M:%S').dt.time
trim_index = get_data_range(list(df_raw.Time), lower_limit, upper_limit)
df = df_raw[trim_index[0]:trim_index[1]]
df = df.reset_index(drop=True)
trimmed_copy_df = df.copy() # to be used in final output to excel 

### Make a column of bools to indicate day vs night

In [5]:
night_start = datetime.time(22,0,0)
night_end = datetime.time(10,0,0)
def isNight(x):
    """
    Take datetime.time object and return True if between 10pm and 10am, False otherwise 
    """
    if (x >= night_start and x < datetime.time(23, 59, 59)) or x < night_end:
        return True
    else:
        return False

In [6]:
# Create Boolean column indicating Night=True Day=False
# use df.isNight = df.Time.apply(isNight) if day/night not split between bottom/top
df.isNight = pd.Series([x > len(df)/2-1 for x in range(len(df))]) 

### Generate bout indices

In [7]:
def get_bout_indices(L):
    """
    Takes a list L and returns a list of tuples of the start/end indices in which sleeping bouts occurred.
    I.e. if two sleeping bouts occured, the first from index 5 to 20, and the second from index 30 to 40,
    this function will return [(5,20), (30,40)]
    """
    indices = []
    start_index = 1
    end_index = 1
    in_bout = False
    
    for i in range(len(L)):
        if L[i] == 0 and in_bout == False: 
            start_index = i
            in_bout = True
        if (L[i] != 0 or i == len(L)-1) and in_bout == True:
            end_index = i
            in_bout = False
            if i == len(L)-1:
                indices.append((start_index, end_index+1))
            else:
                indices.append((start_index, end_index))
    return indices 
        

In [8]:
def create_sleeping_columns(df):
    """
    Creates a new boolean column for each fly. True indicates fly is asleep for that window.
    Columns used for filters in data analysis. 
    """
    for i in range(5):
        df.loc[:,'fly' + str(i+1) + 'sleeping'] = df.iloc[:,i+11].apply(lambda x: x == 0)
    

In [9]:
create_sleeping_columns(df)

### Get day bouts and night bouts

In [10]:
# Redundancy/messy here - refactor
colnames = df.columns[11:16] # Ordered list of fly genotypes
def get_day_night_bouts(df, resp_colnum, sleep_colnum):
    """
    Gets lists of mr rates for a single fly during during indepedent sleeping bouts during day/night. 
    Takes the dataframe, the colnumn number for resp data, column number for sleep data
    Returns a tuple of two lists of lists: resp data for daytime sleep bouts, resp data for nighttime sleep bouts
    """
    bout_indices = get_bout_indices(list(df.iloc[:,sleep_colnum]))
    resp_bouts_day = []
    resp_bouts_night = []
    for i in bout_indices:
        if df.isNight[i[0]:i[1]].mean() >= 0.5:
            resp_bouts_night.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
        else:
            resp_bouts_day.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
    if not resp_bouts_day:
        resp_bouts_day = ['none']
    if not resp_bouts_night:
        resp_bouts_night = ['none']
    return resp_bouts_day, resp_bouts_night

def get_all_bouts(df, colnames):
    """
    Returns a tuple of two dictionaries - individual bouts for each fly for 1) day sleep, 2) night sleep
    Dictionaries format - key: genotype; value: list of lists containing mr for individual sleeping bouts
    """
    resp_colnum = 4
    sleep_colnum = 11
    colnames = df.columns[11:16]
    all_day_bouts = {x: [] for x in colnames}
    all_night_bouts = {x: [] for x in colnames}
    for i in range(5):
        day_bouts, night_bouts = get_day_night_bouts(df, resp_colnum, sleep_colnum)
        all_day_bouts[df.columns[i+11]] += day_bouts 
        all_night_bouts[df.columns[i+11]] += night_bouts 
        resp_colnum+=1
        sleep_colnum+=1
    return all_day_bouts, all_night_bouts

In [11]:
day_bouts_dict, night_bouts_dict = get_all_bouts(df, colnames)

In [12]:
def get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames):
    all_bouts_day_list = []
    all_bouts_night_list = []
    for i in colnames:
        all_bouts_day_list += day_bouts_dict[i] + [[]] # empty lists between columns indicate different flies
        all_bouts_night_list += night_bouts_dict[i] + [[]]
    return all_bouts_day_list, all_bouts_night_list

In [13]:
all_bouts_day_list, all_bouts_night_list = get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames)

In [14]:
# Create dataframes for day/night bouts
all_day_bouts_df = pd.DataFrame(all_bouts_day_list).transpose()
all_night_bouts_df = pd.DataFrame(all_bouts_night_list).transpose()

In [15]:
all_day_bouts_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.039541,1.180896,1.284699,1.227334,1.18559,1.130278,0.986619,,1.098401,,...,1.366439,1.471703,1.284412,,1.592927,1.589793,1.502461,1.616515,1.838532,
1,1.427665,1.383591,1.285172,1.295008,1.095631,1.101771,1.009748,,,,...,1.33013,1.332372,1.294859,,1.610929,,,1.53017,1.89097,
2,,1.294287,,1.206533,1.199461,1.044584,0.936784,,,,...,1.410944,1.315407,1.380678,,1.390133,,,1.537473,,
3,,1.249192,,,,1.026278,0.938204,,,,...,1.311169,1.319408,1.33691,,,,,1.466006,,
4,,1.326397,,,,8.365838,0.951994,,,,...,1.582302,1.280412,1.348364,,,,,,,
5,,1.331773,,,,0.910308,0.950158,,,,...,1.514232,1.296456,1.371576,,,,,,,
6,,1.148687,,,,1.019505,0.945871,,,,...,1.406155,1.219918,1.390585,,,,,,,
7,,1.122283,,,,1.070171,0.94516,,,,...,1.374179,1.174965,,,,,,,,
8,,1.248453,,,,,0.924083,,,,...,1.469105,1.379992,,,,,,,,
9,,1.113087,,,,,1.021218,,,,...,,1.277233,,,,,,,,


### Get sleep profiles

In [16]:
def get_sleep_profile(df, resp_colnum, sleep_colnum):
    """
    Returns a list of tuples with sum of metabolic rate and the percentage sleep and total beam breaks for each hour in the 24 hour window
    """
    fly1_sleep = df.iloc[:,sleep_colnum]
    fly1_resp = df.iloc[:,resp_colnum]
    hourly_resp_sleep = []
    for i in range(24):
        sleep = fly1_sleep[i*12:i*12+12]
        resp = fly1_resp[i*12:i*12+12]
        num_sleep_blocks = 0
        for i in sleep:
            if i == 0:
                num_sleep_blocks += 1
        sleep_avg = num_sleep_blocks / 12 * 100
        hourly_resp_sleep.append((resp.sum(), sleep_avg, sleep.sum()))
    return hourly_resp_sleep

def make_sleep_profile_dict(df):
    """
    Returns a dictionary containing a time index and the sleep profiles (both metabolic rate sum and average sleep)
    for each fly in the dataframe
    """
    resp_colnum = 4
    sleep_colnum = 11
    sleep_profile_dict = {}
    sleep_profile_dict['Time'] = [df.Time[x*12-1] for x in range(1, 25)]
    for i in range(5):
        sleep_profile = get_sleep_profile(df, resp_colnum, sleep_colnum)
        sleep_profile_dict[df.columns[sleep_colnum] + ' MR Sum'] = [x[0] for x in sleep_profile] #name of genotype is header of sleep column
        sleep_profile_dict[df.columns[sleep_colnum] + ' Avg Sleep'] = [x[1] for x in sleep_profile]
        sleep_profile_dict[df.columns[sleep_colnum] + ' Beam Breaks'] = [x[2] for x in sleep_profile]
        resp_colnum+=1
        sleep_colnum+=1
    return sleep_profile_dict

def make_sleep_profile_colnames(df):
    """
    Create list of ordered column names for dataframe to be created from sleep_profile dictionary
    """
    colnames = ['Time']
    for i in range(11,16):
        colnames.append(df.columns[i] + ' MR Sum')
        colnames.append(df.columns[i] + ' Avg Sleep')
        colnames.append(df.columns[i] + ' Beam Breaks')
    return colnames

In [17]:
sleep_profile_dict = make_sleep_profile_dict(df)
sleep_profile_df = pd.DataFrame(sleep_profile_dict, columns=make_sleep_profile_colnames(df))

In [18]:
#Total Sleep 
def get_sleep_minutes_df(df, colnames):
    total_sleep_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        total_sleep_list[i].append(df.iloc[:,i+16].sum()*5)
        total_sleep_list[i].append(df.iloc[:,i+16][df.isNight == False].sum()*5)
        total_sleep_list[i].append(df.iloc[:,i+16][df.isNight == True].sum()*5)
    return pd.DataFrame(total_sleep_list, columns=['Fly', 'Total Sleep Min',
                                                   'Total Day Sleep Min', 'Total Night Sleep Min'])

In [19]:
sleep_minutes_df = get_sleep_minutes_df(df, colnames)

In [20]:
# Wake Sleep MR
def get_wake_sleep_mr_df(df, colnames):
    wake_sleep_mr_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.isNight == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.isNight == True].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True][df.isNight == False].mean())
        wake_sleep_mr_list[i].append(df.iloc[:,i+4][df.iloc[:,i+16] == True][df.isNight == True].mean())
    return pd.DataFrame(wake_sleep_mr_list, columns=['Fly', 'Mean Wake MR', 'Mean Sleep MR', 'Mean Day MR', 
                                                     'Mean Night MR', 'Mean Day Sleep MR', 'Mean Night Sleep MR'])

In [21]:
wake_sleep_mr_df = get_wake_sleep_mr_df(df, colnames)

In [28]:
# Mean Hourly Sleep MR
def get_mean_hourly_sleep_mr_df(df, sleep_profile_df, colnames):
    mr_hourly = sleep_profile_df.iloc[:,1::3]
    mr_hourly.isNight = pd.Series([x > len(mr_hourly)/2-1 for x in range(len(mr_hourly))]) 
    mean_hourly_list = [[x, ] for x in colnames]
    for i in range(len(colnames)):
        mean_hourly_list[i].append(mr_hourly.iloc[:,i].mean())
        mean_hourly_list[i].append(mr_hourly.iloc[:,i][mr_hourly.isNight == False].mean())
        mean_hourly_list[i].append(mr_hourly.iloc[:,i][mr_hourly.isNight == True].mean())
    return pd.DataFrame(mean_hourly_list, columns=['Fly', 'Mean Hourly MR Total', 'Mean Hourly MR Day', 
                                                   'Mean Hourly MR Night'])

In [29]:
mean_hourly_sleep_mr_df = get_mean_hourly_sleep_mr_df(df, sleep_profile_df, colnames)

### To output to csv

In [24]:
all_day_bouts_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.039541,1.180896,1.284699,1.227334,1.18559,1.130278,0.986619,,1.098401,,...,1.366439,1.471703,1.284412,,1.592927,1.589793,1.502461,1.616515,1.838532,
1,1.427665,1.383591,1.285172,1.295008,1.095631,1.101771,1.009748,,,,...,1.33013,1.332372,1.294859,,1.610929,,,1.53017,1.89097,
2,,1.294287,,1.206533,1.199461,1.044584,0.936784,,,,...,1.410944,1.315407,1.380678,,1.390133,,,1.537473,,
3,,1.249192,,,,1.026278,0.938204,,,,...,1.311169,1.319408,1.33691,,,,,1.466006,,
4,,1.326397,,,,8.365838,0.951994,,,,...,1.582302,1.280412,1.348364,,,,,,,
5,,1.331773,,,,0.910308,0.950158,,,,...,1.514232,1.296456,1.371576,,,,,,,
6,,1.148687,,,,1.019505,0.945871,,,,...,1.406155,1.219918,1.390585,,,,,,,
7,,1.122283,,,,1.070171,0.94516,,,,...,1.374179,1.174965,,,,,,,,
8,,1.248453,,,,,0.924083,,,,...,1.469105,1.379992,,,,,,,,
9,,1.113087,,,,,1.021218,,,,...,,1.277233,,,,,,,,


In [25]:
all_night_bouts_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.857891,0.865051,0.810704,0.883466,0.911859,0.860566,1.245866,1.348179,1.281017,1.326572,...,1.566721,1.839078,1.657124,1.479444,1.480622,1.539550,1.455601,2.443564,1.393889,
1,0.907196,0.894454,0.840261,0.844573,,,1.313506,1.358715,1.289906,,...,,1.662897,2.378875,1.492227,1.419233,1.525282,1.401262,1.656526,1.425242,
2,,0.937008,0.842165,,,,1.260168,,,,...,,1.610073,1.748800,1.491720,1.476893,1.508404,1.392193,2.103182,1.435786,
3,,0.911547,,,,,1.257457,,,,...,,1.620077,2.105464,,1.491480,1.479742,,,1.346021,
4,,0.906826,,,,,1.252070,,,,...,,1.583582,1.724582,,1.442932,,,,1.393439,
5,,0.945446,,,,,,,,,...,,1.551587,1.613379,,1.436914,,,,1.339884,
6,,0.910494,,,,,,,,,...,,1.579409,1.544616,,1.427826,,,,1.299192,
7,,0.865610,,,,,,,,,...,,1.548956,,,1.514809,,,,1.324043,
8,,0.871702,,,,,,,,,...,,1.530473,,,1.511329,,,,1.284124,
9,,0.850490,,,,,,,,,...,,1.527500,,,1.473366,,,,1.273281,


In [26]:
sleep_profile_df

Unnamed: 0,Time,w1118 BL 6326 MR Sum,w1118 BL 6326 Avg Sleep,w1118 BL 6326 Beam Breaks,w1118 BL 6326.1 MR Sum,w1118 BL 6326.1 Avg Sleep,w1118 BL 6326.1 Beam Breaks,eaat2 e04636 MR Sum,eaat2 e04636 Avg Sleep,eaat2 e04636 Beam Breaks,eaat2 e04636.1 MR Sum,eaat2 e04636.1 Avg Sleep,eaat2 e04636.1 Beam Breaks,eaat2 e04636.2 MR Sum,eaat2 e04636.2 Avg Sleep,eaat2 e04636.2 Beam Breaks
0,10:56:04,13.666723,0.0,936,18.943555,8.333333,431,16.985112,0.0,521,18.626518,8.333333,89,20.879742,0.0,438.0
1,11:56:04,14.268239,0.0,1011,20.308204,0.0,541,16.451909,0.0,397,14.862725,83.333333,3,18.561291,0.0,199.0
2,12:56:04,14.507468,0.0,1128,20.116906,0.0,507,16.343367,0.0,310,16.466497,41.666667,50,18.079725,0.0,144.0
3,13:56:04,15.105396,0.0,1111,19.625666,0.0,417,16.287813,0.0,248,17.933557,16.666667,152,18.222576,0.0,122.0
4,14:56:04,14.410743,0.0,864,19.774922,0.0,447,16.533504,0.0,235,14.094162,100.0,0,17.426245,0.0,109.0
5,15:56:04,15.773512,75.0,68,18.800395,0.0,423,15.486906,0.0,266,15.227216,41.666667,56,17.478851,0.0,119.0
6,16:56:04,14.073482,100.0,0,18.979456,0.0,361,14.596119,0.0,259,16.543512,16.666667,74,18.212323,25.0,77.0
7,17:56:04,15.142213,66.666667,18,19.021843,0.0,245,17.382679,8.333333,200,18.15317,58.333333,25,19.557847,16.666667,64.0
8,18:56:04,15.210787,41.666667,41,19.139298,0.0,310,17.182623,0.0,206,15.557176,100.0,0,20.330804,33.333333,114.0
9,19:56:05,20.560867,83.333333,12,20.72283,0.0,494,18.735982,0.0,247,13.71704,100.0,0,18.886279,0.0,80.0


In [27]:
sleep_minutes_df

Unnamed: 0,Fly,Total Sleep Min,Total Day Sleep Min,Total Night Sleep Min
0,w1118 BL 6326,820,290,530
1,w1118 BL 6326.1,645,5,640
2,eaat2 e04636,645,5,640
3,eaat2 e04636.1,995,380,615
4,eaat2 e04636.2,515,55,460


In [28]:
wake_sleep_mr_df

Unnamed: 0,Fly,Mean Wake MR,Mean Sleep MR,Mean Day MR,Mean Night MR,Mean Day Sleep MR,Mean Night Sleep MR
0,w1118 BL 6326,1.201862,1.170727,1.235784,1.13248,1.26581,1.1187
1,w1118 BL 6326.1,1.631928,1.023106,1.646857,1.071596,1.098401,1.022518
2,eaat2 e04636,1.380552,0.890217,1.413147,0.908699,1.461901,0.88575
3,eaat2 e04636.1,1.453698,1.159332,1.36058,1.140018,1.242371,1.108024
4,eaat2 e04636.2,1.683387,1.53855,1.607447,1.655728,1.596901,1.531573


In [30]:
mean_hourly_sleep_mr_df

Unnamed: 0,Fly,Mean Hourly MR Total,Mean Hourly MR Day,Mean Hourly MR Night
0,w1118 BL 6326,14.209586,14.829413,13.58976
1,w1118 BL 6326.1,16.310717,19.762282,12.859151
2,eaat2 e04636,13.931072,16.957759,10.904384
3,eaat2 e04636.1,15.00359,16.326964,13.680216
4,eaat2 e04636.2,19.579051,19.28937,19.868732


## Export to .xlsx

In [31]:
# Convert datetime.time columns back to str for correct formatting in Excel
sleep_profile_df.Time = sleep_profile_df.Time.apply(lambda x: str(x))
trimmed_copy_df.Time = trimmed_copy_df.Time.apply(lambda x: str(x)) 

writer = pd.ExcelWriter('test_out.xlsx', engine='xlsxwriter')
trimmed_copy_df.to_excel(writer, sheet_name='Trimmed Analysis', index = False)
all_day_bouts_df.to_excel(writer, sheet_name='All Day Bouts', index = False, header = False)
all_night_bouts_df.to_excel(writer, sheet_name='All Night Bouts', index = False, header = False)
sleep_profile_df.to_excel(writer, sheet_name='Sleep Profile', index = False)
sleep_minutes_df.to_excel(writer, sheet_name='Min. of Sleep', index = False)
wake_sleep_mr_df.to_excel(writer, sheet_name='Wake Sleep MR', index = False)
mean_hourly_sleep_mr_df.to_excel(writer, sheet_name='Mean Hourly Sleep', index = False)
writer.save()