In [2]:
import itertools

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.integrate

# Import Altair for high level plotting
import altair as alt
import altair_catplot as altcat

# Import Bokeh modules for interactive plotting
import bokeh.io
import bokeh.plotting

# Set up Bokeh for inline viewing
bokeh.io.output_notebook()

# Pevent bulky altair plots
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [5]:
df = pd.read_csv('../data/130315_1A_aanat2.csv', comment='#')

# Load in the genotype file, call it df_gt for genotype DataFrame
df_gt = pd.read_csv('../data/130315_1A_genotypes.txt',
                    delimiter='\t',
                    comment='#',
                    header=[0, 1])

# Reset the columns to be the second level of indexing
df_gt.columns = df_gt.columns.get_level_values(1)

# Check out the new columns
df_gt.columns

df_gt.columns = ['wt', 'het', 'mut']

# Tidy the DataFrame
df_gt = pd.melt(df_gt, var_name='genotype', value_name='location')

# Drop all rows that have a NaN in them
df_gt = df_gt.dropna()

df_gt = df_gt.reset_index(drop=True)

df_gt.loc[:,'location'] = df_gt.loc[:, 'location'].astype(int)

df = pd.merge(df, df_gt)

df['time'] = pd.to_datetime(df['time'])

df['light'] = (  (df['time'].dt.time >= pd.to_datetime('9:00:00').time())
               & (df['time'].dt.time < pd.to_datetime('23:00:00').time()))

In [6]:
df.head()

Unnamed: 0,location,activity,time,zeit,zeit_ind,day,genotype,light
0,1,0.6,2013-03-15 18:31:09,-14.480833,-869,4,het,True
1,1,1.9,2013-03-15 18:32:09,-14.464167,-868,4,het,True
2,1,1.9,2013-03-15 18:33:09,-14.4475,-867,4,het,True
3,1,13.4,2013-03-15 18:34:09,-14.430833,-866,4,het,True
4,1,15.4,2013-03-15 18:35:09,-14.414167,-865,4,het,True


In [12]:
df.shape[0]
# we dropped fish?

391499

In [7]:
dataframes = []
for i in range(1, 97):
    dataframes.append(df[df['location'] == i])


In [21]:
for i in range(96):
    print (dataframes[i].shape[0])

5363
5363
5363
5363
5363
5363
0
5363
0
5363
5363
5363
5363
5363
5363
0
0
5363
5363
5363
5363
5363
5363
5363
0
0
5363
5363
5363
5363
0
0
5363
0
5363
5363
0
5363
5363
5363
0
0
5363
5363
0
5363
5363
0
5363
0
5363
5363
5363
5363
5363
5363
5363
5363
5363
5363
5363
5363
0
5363
5363
5363
5363
5363
5363
0
5363
5363
5363
5363
5363
5363
5363
5363
5363
5363
5363
0
0
0
0
5363
5363
5363
5363
5363
0
5363
5363
0
5363
5363


In [10]:
# Get the activity level of all the fish together!
for i, d in enumerate(dataframes):
    data = []
    inactive_minutes_count = 0
    counter = 0
    print ('made it to ' + str(i))
    for index, row in d.iterrows():
        if row['activity'] < 1: # our fishy is still inactive
            inactive_minutes_count += 1
            
        else: # we are in a period of activity
            if inactive_minutes_count == 0:
                data.append(0)
            else: # we are coming from a period of inactivity
                data += [inactive_minutes_count] + [0] * (inactive_minutes_count - 1) + [0]
                inactive_minutes_count = 0
        
        if counter == d.shape[0] - 1: # we are at the end and are still inactive, manually add
            print ('yay')
            data += [inactive_minutes_count] + [0] * (inactive_minutes_count - 1)
            
        counter += 1
    print ('total iterations: ' + str(counter))
    
    print ('length of calculated data array: ' + str(len(data)))
    df_1 = df[df["location"] == i + 1]
    print ('length of dataframe: ' + str(df_1.shape[0]))

    df_1["InactivityBegun" + str(i)] = np.array(data[:df_1.shape[0]])
    

made it to 0
yay
total iterations: 5363
length of calculated data array: 5363
length of dataframe: 5363
made it to 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


yay
total iterations: 5363
length of calculated data array: 5364
length of dataframe: 5363
made it to 2
yay
total iterations: 5363
length of calculated data array: 5363
length of dataframe: 5363
made it to 3
yay
total iterations: 5363
length of calculated data array: 5364
length of dataframe: 5363
made it to 4
yay
total iterations: 5363
length of calculated data array: 5364
length of dataframe: 5363
made it to 5
yay
total iterations: 5363
length of calculated data array: 5364
length of dataframe: 5363
made it to 6
total iterations: 0
length of calculated data array: 0
length of dataframe: 0
made it to 7
yay
total iterations: 5363
length of calculated data array: 5364
length of dataframe: 5363
made it to 8
total iterations: 0
length of calculated data array: 0
length of dataframe: 0
made it to 9
yay
total iterations: 5363
length of calculated data array: 5363
length of dataframe: 5363
made it to 10
yay
total iterations: 5363
length of calculated data array: 5363
length of dataframe: 536