## Functionality that needs to be added

- ~~Trim Raw Input~~
- ~~Night vs Day bouts (critera - choose where over 50% of bout is spent, in case of tie choose night)~~
- ~~Create Sleep Profile~~
- Create summary stats
- Create function to run all flies and arrange in exportable format
- Write to seperate sheets in Excel
- Adapt to python script that can be run in terminal - takes input .csv and exports excel file

### Import Libraries that will be needed

In [43]:
import pandas as pd
import numpy as np
import datetime

### Import data from .csv

In [44]:
df_raw = pd.read_csv("Bethany_Raw.csv")

### Trim to 24 hour range

In [45]:
lower_limit = datetime.time(10,0,0)
upper_limit = datetime.time(10,5,0)
def get_data_range(L, lower_limit, upper_limit):
    """
    Find the start and end indices for a 24 hour range starting between a given time range
    Takes arguments L - a list of datetime.time objects, lower_limit and upper_limit - datetime.time objects 
    to specificy the range in which to choose the start time
    Returns a tuple: (start_index, end_index)
    """
    start_index = 0
    for i in range(len(L)):
        if L[i] >= lower_limit and L[i] < upper_limit:
            start_index = i
            break
    return (start_index, start_index + 288)


In [46]:
# Trim raw data to 24 hour window
df_raw.Time = pd.to_datetime(df_raw.Time, format='%H:%M:%S').dt.time
trim_index = get_data_range(list(df_raw.Time), lower_limit, upper_limit)
df = df_raw[trim_index[0]:trim_index[1]]
df = df.reset_index(drop=True)

### Make a column of bools to indicate day vs night

In [47]:
def isNight(x):
    """
    Take datetime.time object and return True if between 10pm and 10am, False otherwise 
    """
    night_start = datetime.time(22,0,0)
    night_end = datetime.time(10,0,0)
    if (x >= night_start and x < datetime.time(23, 59, 59)) or x < night_end:
        return True
    else:
        return False

In [48]:
# Create Boolean column indicating Night=True Day=False
# df.loc[:,'isNight'] = df.Time.apply(isNight) # If day/night are not split between top and bottom of window use this
df.isNight = pd.Series([x > len(df)/2-1 for x in range(len(df))]) # Faster / easier version accomplishes the same as above, IF day/night is split top/bottom


### Generate bout indices

In [49]:
# Script for collecting bouts
# Will need to create a list of bout indices to access resp data during bouts
def get_bout_indices(L):
    """
    Takes a list L and returns a list of tuples of the start/end indices in which sleeping bouts occurred.
    I.e. if two sleeping bouts occured, the first from index 5 to 20, and the second from index 30 to 40,
    this function will return [(5,20), (30,40)]
    """
    indices = []
    start_index = 1
    end_index = 1
    in_bout = False
    
    for i in range(len(L)):
        if L[i] == 0 and in_bout == False: 
            start_index = i
            in_bout = True
        if (L[i] != 0 or i == len(L)-1) and in_bout == True:
            end_index = i
            in_bout = False
            if i == len(L)-1:
                indices.append((start_index, end_index+1))
            else:
                indices.append((start_index, end_index))
    return indices 
        

# Better idea!!! Make a new column for each fly in the df to indicate True if sleep bout, False if wake.. This will make everything easier, and I already have the indices

In [50]:
ind = get_bout_indices(df.iloc[:,11])

In [51]:
def get_sleep_filter(indices):
    filter_list = []
    for t in indices:
        for i in range(t[0], t[1]):
            filter_list.append(i)
    return filter_list

In [52]:
x = get_sleep_filter(ind)

In [53]:
def create_sleeping_columns(df):
    for i in range(5):
        df.loc[:,'fly' + str(i+1) + 'sleeping'] = df.iloc[:,i+11].apply(lambda x: x == 0)
    

In [54]:
create_sleeping_columns(df)

In [42]:
df

Unnamed: 0,Time,Filename,mean_Deg_C_1_1,uL_BL,uL_Channel_2,uL_Channel_3,uL_Channel_4,uL_Channel_5,uL_Channel_6,Unnamed: 9,...,w1118 BL 6326,w1118 BL 6326.1,eaat2 e04636,eaat2 e04636.1,eaat2 e04636.2,fly1sleeping,fly2sleeping,fly3sleeping,fly4sleeping,fly5sleeping
0,10:01:04,RepRec~04-28-2017_0269.exp,23.73108,0.222160,1.023476,1.098401,1.016938,0.914110,1.418441,,...,80,0,6,0,17.0,False,True,False,True,False
1,10:06:04,RepRec~04-28-2017_0270.exp,23.70829,0.217601,1.139487,1.551482,1.302247,1.157874,1.758295,,...,66,17,43,2,54.0,False,False,False,False,False
2,10:11:04,RepRec~04-28-2017_0271.exp,23.71971,0.221645,1.110626,1.560068,1.453209,1.465149,1.724382,,...,87,39,47,9,49.0,False,False,False,False,False
3,10:16:04,RepRec~04-28-2017_0272.exp,23.74637,0.221363,1.139690,1.650532,1.632397,1.577601,1.700607,,...,90,35,57,13,46.0,False,False,False,False,False
4,10:21:04,RepRec~04-28-2017_0273.exp,23.77053,0.231287,1.134832,1.570578,1.494593,1.557409,1.746360,,...,82,28,53,7,41.0,False,False,False,False,False
5,10:26:04,RepRec~04-28-2017_0274.exp,23.78069,0.242223,1.126824,1.628088,1.493716,1.690734,1.766903,,...,67,45,36,3,40.0,False,False,False,False,False
6,10:31:04,RepRec~04-28-2017_0275.exp,23.81213,0.224381,1.153827,1.623043,1.497300,1.701242,1.713951,,...,72,38,61,9,37.0,False,False,False,False,False
7,10:36:04,RepRec~04-28-2017_0276.exp,23.88280,0.219736,1.134260,1.645526,1.410356,1.779325,1.672455,,...,84,49,36,7,26.0,False,False,False,False,False
8,10:41:04,RepRec~04-28-2017_0277.exp,23.96792,0.225769,1.145640,1.682994,1.419454,1.646974,1.673891,,...,81,43,49,16,37.0,False,False,False,False,False
9,10:46:04,RepRec~04-28-2017_0278.exp,24.03759,0.241932,1.185937,1.620285,1.408852,1.584299,1.971076,,...,85,39,51,14,39.0,False,False,False,False,False


### Get day bouts and night bouts

In [8]:
# Need to put this into a function, and make a loop to generate day/night bouts for each fly of 5 in the df
colnames = df.columns[11:16]
def get_day_night_bouts(df, resp_colnum, sleep_colnum):
    """
    Takes a dataframe, the colnumn number for resp data, column number for sleep data
    Returns a tuple of two lists of lists: resp data for daytime sleep bouts, resp data for nighttime sleep bouts
    """
    bout_indices = get_bout_indices(list(df.iloc[:,sleep_colnum]))
    resp_bouts_day = []
    resp_bouts_night = []
    for i in bout_indices:
        if df.isNight[i[0]:i[1]].mean() >= 0.5:
            resp_bouts_night.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
        else:
            resp_bouts_day.append(list(df.iloc[:,resp_colnum])[i[0]:i[1]])
    if not resp_bouts_day:
        resp_bouts_day = ['none']
    if not resp_bouts_night:
        resp_bouts_night = ['none']
    return resp_bouts_day, resp_bouts_night

def get_all_bouts(df, colnames):
    """
    """
    resp_colnum = 4
    sleep_colnum = 11
    colnames = df.columns[11:16]
    all_day_bouts = {x: [] for x in colnames}
    all_night_bouts = {x: [] for x in colnames}
    for i in range(5):
        day_bouts, night_bouts = get_day_night_bouts(df, resp_colnum, sleep_colnum)
        all_day_bouts[df.columns[i+11]] += day_bouts 
        all_night_bouts[df.columns[i+11]] += night_bouts 
        resp_colnum+=1
        sleep_colnum+=1
    return all_day_bouts, all_night_bouts

In [9]:
day_bouts_dict, night_bouts_dict = get_all_bouts(df, colnames)

In [10]:
def get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames):
    all_bouts_day_list = []
    all_bouts_night_list = []
    for i in colnames:
        all_bouts_day_list += day_bouts_dict[i] + [[]]
        all_bouts_night_list += night_bouts_dict[i] + [[]]
    return all_bouts_day_list, all_bouts_night_list

In [11]:
all_bouts_day_list, all_bouts_night_list = get_all_bouts_list(day_bouts_dict, night_bouts_dict, colnames)

In [13]:
# Create dataframes for day/night bouts
all_day_bouts_df = pd.DataFrame(all_bouts_day_list).transpose()
all_night_bouts_df = pd.DataFrame(all_bouts_night_list).transpose()

In [14]:
all_day_bouts_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.039541,1.180896,1.284699,1.227334,1.18559,1.130278,0.986619,,1.098401,,...,1.366439,1.471703,1.284412,,1.592927,1.589793,1.502461,1.616515,1.838532,
1,1.427665,1.383591,1.285172,1.295008,1.095631,1.101771,1.009748,,,,...,1.33013,1.332372,1.294859,,1.610929,,,1.53017,1.89097,
2,,1.294287,,1.206533,1.199461,1.044584,0.936784,,,,...,1.410944,1.315407,1.380678,,1.390133,,,1.537473,,
3,,1.249192,,,,1.026278,0.938204,,,,...,1.311169,1.319408,1.33691,,,,,1.466006,,
4,,1.326397,,,,8.365838,0.951994,,,,...,1.582302,1.280412,1.348364,,,,,,,
5,,1.331773,,,,0.910308,0.950158,,,,...,1.514232,1.296456,1.371576,,,,,,,
6,,1.148687,,,,1.019505,0.945871,,,,...,1.406155,1.219918,1.390585,,,,,,,
7,,1.122283,,,,1.070171,0.94516,,,,...,1.374179,1.174965,,,,,,,,
8,,1.248453,,,,,0.924083,,,,...,1.469105,1.379992,,,,,,,,
9,,1.113087,,,,,1.021218,,,,...,,1.277233,,,,,,,,


### Get sleep profiles

In [15]:
def get_sleep_profile(df, resp_colnum, sleep_colnum):
    """
    Returns a list of tuples with sum of metabolic rate and the percentage sleep for each hour in the 24 hour window
    """
    fly1_sleep = df.iloc[:,sleep_colnum]
    fly1_resp = df.iloc[:,resp_colnum]
    hourly_resp_sleep = []
    for i in range(24):
        sleep = fly1_sleep[i*12:i*12+12]
        resp = fly1_resp[i*12:i*12+12]
        num_sleep_blocks = 0
        for i in sleep:
            if i == 0:
                num_sleep_blocks += 1
        sleep_avg = num_sleep_blocks / 12 * 100
        hourly_resp_sleep.append((resp.sum(), sleep_avg))
    return hourly_resp_sleep

def make_sleep_profile_dict(df):
    """
    Returns a dictionary containing a time index and the sleep profiles (both metabolic rate sum and average sleep)
    for each fly in the dataframe
    """
    resp_colnum = 4
    sleep_colnum = 11
    sleep_profile_dict = {}
    sleep_profile_dict['Time'] = [df.Time[x*12-1] for x in range(1, 25)]
    for i in range(5):
        sleep_profile = get_sleep_profile(df, resp_colnum, sleep_colnum)
        sleep_profile_dict[df.columns[sleep_colnum] + ' MR Sum'] = [x[0] for x in sleep_profile] #name of genotype is header of sleep column
        sleep_profile_dict[df.columns[sleep_colnum] + ' Avg Sleep'] = [x[1] for x in sleep_profile]
        resp_colnum+=1
        sleep_colnum+=1
    return sleep_profile_dict

def make_colnames(df):
    """
    Create list of ordered column names for dataframe to be created from sleep_profile dictionary
    """
    colnames = ['Time']
    for i in range(11,16):
        colnames.append(df.columns[i] + ' MR Sum')
        colnames.append(df.columns[i] + ' Avg Sleep')
    return colnames

In [19]:
#Creat test df for sleep profile
sleep_profile_dict = make_sleep_profile_dict(df)
sleep_profile_df = pd.DataFrame(test_dict, columns=make_colnames(df))

In [20]:
sleep_profile_df

Unnamed: 0,Time,w1118 BL 6326 MR Sum,w1118 BL 6326 Avg Sleep,w1118 BL 6326.1 MR Sum,w1118 BL 6326.1 Avg Sleep,eaat2 e04636 MR Sum,eaat2 e04636 Avg Sleep,eaat2 e04636.1 MR Sum,eaat2 e04636.1 Avg Sleep,eaat2 e04636.2 MR Sum,eaat2 e04636.2 Avg Sleep
0,10:56:04,13.666723,0.0,18.943555,8.333333,16.985112,0.0,18.626518,8.333333,20.879742,0.0
1,11:56:04,14.268239,0.0,20.308204,0.0,16.451909,0.0,14.862725,83.333333,18.561291,0.0
2,12:56:04,14.507468,0.0,20.116906,0.0,16.343367,0.0,16.466497,41.666667,18.079725,0.0
3,13:56:04,15.105396,0.0,19.625666,0.0,16.287813,0.0,17.933557,16.666667,18.222576,0.0
4,14:56:04,14.410743,0.0,19.774922,0.0,16.533504,0.0,14.094162,100.0,17.426245,0.0
5,15:56:04,15.773512,75.0,18.800395,0.0,15.486906,0.0,15.227216,41.666667,17.478851,0.0
6,16:56:04,14.073482,100.0,18.979456,0.0,14.596119,0.0,16.543512,16.666667,18.212323,25.0
7,17:56:04,15.142213,66.666667,19.021843,0.0,17.382679,8.333333,18.15317,58.333333,19.557847,16.666667
8,18:56:04,15.210787,41.666667,19.139298,0.0,17.182623,0.0,15.557176,100.0,20.330804,33.333333
9,19:56:05,20.560867,83.333333,20.72283,0.0,18.735982,0.0,13.71704,100.0,18.886279,0.0


### Summary Stats
- Minutes of Sleep
    - Total sleep
    - total day sleep,
    - total night sleep
- Wake/Sleep MR
    - Mean Wake MR,
    - Mean Sleep MR,
    - Mean Day MR,
    - Mean Night MR,
    - Mean Day Sleep MR,
    - Mean Night Sleep MR
- Mean hourly slep
    - Mean hourly MR all
    - Mean hourly Day
    - Mean hourly MR night

In [18]:
#Total Sleep 
def get_sleep_minutes_df(df, colnames)
    total_sleep_list = [['Fly',],['Total Sleep Min'],['Total Day Sleep Min'],['Total Night Sleep Min']]
    for fly_i in range(len(colnames)):
        for stat_i in range(4)
            total_sleep_list[stat_i].append(colnames[fly_i])
            total_sleep_list[stat_i].
        

SyntaxError: invalid syntax (<ipython-input-18-0961d1e834b1>, line 2)

## Export to .csv

In [None]:
# Now we save this list of lists as a csv
# resp_bouts_df = pd.DataFrame(resp_bouts).transpose()
# resp_bouts_df.to_csv('test.csv', index=False, header=False)