Start by downloading CSV file from EpiCollect site:
http://epicollectserver.appspot.com/project.html?name=crb_yigo_barrel_traps


In [32]:
import pandas as pd
from datetime import timedelta
import pymysql
import getpass

In [33]:
df = pd.DataFrame.from_csv('listEntriesCSV.csv',index_col=False,parse_dates=True)
df.drop(['key','latitude','longitude','altitude','deviceId','entryId','lastEdited',
         'timeUploaded','projectName','photo','Unnamed: 15'],inplace=True,axis=1)
# Convert dateCreated from string to datatime
df['dateCreated'] = pd.to_datetime(df['dateCreated'])
# Convert from GMT to ChST by adding 10 hours
df['dateCreated'] = df['dateCreated'] + timedelta(hours=10)
# Replace all NaNs with 0
df.fillna(0, inplace=True)
# Create endDate column and drop dateCreated
df['endDate'] = df['dateCreated'].apply(lambda x: x.strftime('%Y-%m-%d'))
df.sort(['endDate','barrelID'],inplace=True)
# Reorder columns
df = df[['endDate','barrelID','males','females','note']]
df.rename(columns={'note': 'notes'}, inplace=True)
df

Unnamed: 0,endDate,barrelID,males,females,notes
474,2014-10-15,1,0,0,0
129,2014-10-15,2,0,1,0
597,2014-10-15,3,0,0,0
134,2014-10-15,4,0,0,0
488,2014-10-15,5,1,1,0
246,2014-10-15,6,0,0,0
124,2014-10-15,7,1,0,0
307,2014-10-15,8,0,2,0
312,2014-10-15,9,0,2,0
645,2014-10-15,10,0,0,0


## Get last trapping period from project database

In [34]:
conn = pymysql.connect(host='mysql.guaminsects.net',user='readonlyguest',passwd='readonlypassword',db='oryctes')
sql = """
SELECT * 
FROM YigoBarrelObs
"""
df_obs = pd.io.sql.read_sql(sql, conn)
conn.close()

## Keep only the last trapping session data

In [35]:
maxEndDate = df_obs['endDate'].max()
df_obs = df_obs[df_obs['endDate'] == maxEndDate]
df_obs = df_obs.sort('barrelID')
df_obs

Unnamed: 0,id,barrelID,trapType,lure,uvled,funnel,substrate,startDate,endDate,males,females,notes
1690,3096,1,P,y,y,y,y,2015-04-24,2015-04-30,0,0,
1669,3075,2,P,y,y,y,y,2015-04-24,2015-04-30,0,0,
1686,3092,3,P,y,y,y,y,2015-04-24,2015-04-30,3,4,
1691,3097,4,P,y,y,y,n,2015-04-24,2015-04-30,0,0,
1695,3101,5,P,y,y,y,y,2015-04-24,2015-04-30,3,2,
1678,3084,6,P,y,y,y,y,2015-04-24,2015-04-30,0,0,
1681,3087,7,P,y,y,y,n,2015-04-24,2015-04-30,1,1,
1693,3099,8,P,y,y,y,n,2015-04-24,2015-04-30,2,1,
1674,3080,9,P,y,y,y,y,2015-04-24,2015-04-30,0,0,
1676,3082,10,P,y,y,y,y,2015-04-24,2015-04-30,1,0,


## In the dataframe of EpiCollect data, delete all observations prior to the last endDate in the project database. We want to append only new data.

In [36]:
strMaxEndDate = maxEndDate.strftime('%Y-%m-%d')
df_new = df[df['endDate'] > strMaxEndDate]
df_new

Unnamed: 0,endDate,barrelID,males,females,notes
464,2015-05-08,1,0,0,0
454,2015-05-08,2,1,1,0
662,2015-05-08,3,1,2,0
660,2015-05-08,4,0,0,0
524,2015-05-08,5,2,2,0
602,2015-05-08,6,0,0,0
685,2015-05-08,7,0,0,0
433,2015-05-08,8,0,0,0
6,2015-05-08,9,1,2,0
859,2015-05-08,10,0,0,0


In [37]:
df_new['trapType'] = 'P'
df_new['lure'] = 'y'
df_new['uvled'] = 'y'
df_new['funnel'] = 'y'
df_new['substrate'] = 'x'
df_new['startDate'] = '2000-01-01'
df_new

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats

Unnamed: 0,endDate,barrelID,males,females,notes,trapType,lure,uvled,funnel,substrate,startDate
464,2015-05-08,1,0,0,0,P,y,y,y,x,2000-01-01
454,2015-05-08,2,1,1,0,P,y,y,y,x,2000-01-01
662,2015-05-08,3,1,2,0,P,y,y,y,x,2000-01-01
660,2015-05-08,4,0,0,0,P,y,y,y,x,2000-01-01
524,2015-05-08,5,2,2,0,P,y,y,y,x,2000-01-01
602,2015-05-08,6,0,0,0,P,y,y,y,x,2000-01-01
685,2015-05-08,7,0,0,0,P,y,y,y,x,2000-01-01
433,2015-05-08,8,0,0,0,P,y,y,y,x,2000-01-01
6,2015-05-08,9,1,2,0,P,y,y,y,x,2000-01-01
859,2015-05-08,10,0,0,0,P,y,y,y,x,2000-01-01


## Update substrate column and startDate column

In [38]:
def getSubstrate(barrelID):
    substrate = df_obs['substrate'][df_obs['barrelID']==barrelID].values[0]
    return substrate

In [39]:
def getStartDate(endDate):
    if endDate == '2015-05-08':
        startDate = strMaxEndDate 
    else:
        startDate =  df_new['endDate'][df_new['endDate'] < endDate].max() 
    return startDate

In [40]:
for index, row in df_new.iterrows():
    substrate = getSubstrate(row['barrelID'])
    df_new['substrate'][index] = substrate
    startDate = getStartDate(row['endDate'])
    df_new['startDate'][index] = startDate

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [41]:
# Reorder columns
df_new = df_new[['startDate', 'endDate','barrelID','trapType','lure','uvled','funnel','substrate','males','females','notes']]
# Sort rows
df_new.sort(['endDate','barrelID'],inplace=True)
df_new.reset_index(drop=True, inplace=True)
df_new

Unnamed: 0,startDate,endDate,barrelID,trapType,lure,uvled,funnel,substrate,males,females,notes
0,2015-04-30,2015-05-08,1,P,y,y,y,y,0,0,0
1,2015-04-30,2015-05-08,2,P,y,y,y,y,1,1,0
2,2015-04-30,2015-05-08,3,P,y,y,y,y,1,2,0
3,2015-04-30,2015-05-08,4,P,y,y,y,n,0,0,0
4,2015-04-30,2015-05-08,5,P,y,y,y,y,2,2,0
5,2015-04-30,2015-05-08,6,P,y,y,y,y,0,0,0
6,2015-04-30,2015-05-08,7,P,y,y,y,n,0,0,0
7,2015-04-30,2015-05-08,8,P,y,y,y,n,0,0,0
8,2015-04-30,2015-05-08,9,P,y,y,y,y,1,2,0
9,2015-04-30,2015-05-08,10,P,y,y,y,y,0,0,0


## We are now ready to append the new data to the project database

In [42]:
password = getpass.getpass()
conn = pymysql.connect(host='mysql.guaminsects.net',user='aubreymoore',passwd=password,db='oryctes')
df_new.to_sql(name='YigoBarrelObs',con=conn,flavor='mysql',if_exists='append',index=False)
conn.close()

········
