In [31]:
import pandas as pd

august_df = pd.read_csv('s4_august_by_hand_30plus_v2.csv')
august_df.columns

Index(['cultivar_id', 'cultivar', 'season', 'range', 'column',
       'absorbance_850', 'roll', 'PhiNO', 'PhiNPQ', 'absorbance_530',
       'absorbance_605', 'absorbance_730', 'absorbance_880', 'absorbance_940',
       'Fs', 'NPQt', 'qL', 'qP', 'RFd', 'SPAD_530', 'SPAD_605', 'SPAD_730',
       'leaf_thickness', 'ambient_humidity', 'leaf_angle_clamp_position',
       'pitch', 'proximal_air_temperature', 'FvP/FmP',
       'leaf_temperature_differential', 'Phi2', 'relative_chlorophyll',
       'FmPrime', 'FoPrime', 'LEF', 'SPAD_420', 'SPAD_650', 'SPAD_850',
       'SPAD_880', 'light_intensity_PAR', 'leaf_temperature', 'date'],
      dtype='object')

In [32]:
august_df.head(2)

Unnamed: 0,cultivar_id,cultivar,season,range,column,absorbance_850,roll,PhiNO,PhiNPQ,absorbance_530,...,FmPrime,FoPrime,LEF,SPAD_420,SPAD_650,SPAD_850,SPAD_880,light_intensity_PAR,leaf_temperature,date
0,6000000964,PI570373,4,21,2,0.443,-2.6,0.054,0.722,0.99,...,2986.15,2233.0,157.363,135.13,34.8,-3.18,1.36,1560.0,315.48,2017-08-28 12:00:00
1,6000000851,PI511355,4,26,15,0.449,-39.74,0.098,0.673,1.27,...,4181.791,2583.0,158.183,188.25,49.6,-3.74,1.34,1530.0,310.14,2017-08-28 11:14:00


In [33]:
august_df['date'] = pd.to_datetime(august_df['date'])

In [34]:
from datetime import datetime
print(datetime.strptime('2017-05-01 12:00:00', '%Y-%m-%d %H:%M:%S'))

2017-05-01 12:00:00


In [35]:
# add an offset column that subtracts a "start date" from each of the datetimes in the samples.  This will give us an offset in days
august_df['day_delta'] = august_df['date'] - datetime.strptime('2017-05-01 12:00:00', '%Y-%m-%d %H:%M:%S')

In [36]:
# here is how a timedelta offset is converted to its component part
august_df['day_delta'].head()

0   119 days 00:00:00
1   118 days 23:14:00
2   118 days 23:15:00
3   118 days 23:59:00
4   119 days 00:10:00
Name: day_delta, dtype: timedelta64[ns]

In [38]:
print(august_df['day_delta'][1].days)
august_df['day_delta'][1].seconds

118


83640

The subtraction yields a Python Timedelta() object.  It can be operated on with xxxx.days  and xxxx.seconds.  So lets make day_offset into a floating point value by adding the day and fractional days together (86400 seconds / day).  We will build a list of the fractional day offsets in order and then add it as a new column, and drop the 'day_delta' column.  Then this dataset will have a day_offset field compatible with the main Season 4 dataset. 

In [46]:
day_offset = []
for index, row in august_df.iterrows():
    #print(row['day_delta'],row['day_delta'].days, row['day_delta'].seconds )
    day_offset.append(float(row['day_delta'].days) + float(row['day_delta'].seconds/86400.0))
    #if (index>5):
    #        break
day_offset[0:10]

[119.0,
 118.96805555555555,
 118.96875,
 118.99930555555555,
 119.00694444444444,
 119.00833333333334,
 119.01180555555555,
 119.01388888888889,
 119.02083333333333,
 119.02152777777778]

In [47]:
august_df['day_offset'] = day_offset

In [48]:
august_df.head()

Unnamed: 0,cultivar_id,cultivar,season,range,column,absorbance_850,roll,PhiNO,PhiNPQ,absorbance_530,...,LEF,SPAD_420,SPAD_650,SPAD_850,SPAD_880,light_intensity_PAR,leaf_temperature,date,day_delta,day_offset
0,6000000964,PI570373,4,21,2,0.443,-2.6,0.054,0.722,0.99,...,157.363,135.13,34.8,-3.18,1.36,1560.0,315.48,2017-08-28 12:00:00,119 days 00:00:00,119.0
1,6000000851,PI511355,4,26,15,0.449,-39.74,0.098,0.673,1.27,...,158.183,188.25,49.6,-3.74,1.34,1530.0,310.14,2017-08-28 11:14:00,118 days 23:14:00,118.968056
2,6000000851,PI511355,4,26,15,0.456,-17.32,0.172,0.525,1.224,...,177.017,171.18,46.18,-3.09,1.89,1301.0,307.44,2017-08-28 11:15:00,118 days 23:15:00,118.96875
3,6000000964,PI570373,4,21,2,0.421,-0.9,0.015,0.908,0.997,...,53.598,134.03,33.01,-3.62,1.06,1561.0,318.92,2017-08-28 11:59:00,118 days 23:59:00,118.999306
4,6000000964,PI570373,4,45,3,0.535,11.29,0.033,0.832,1.185,...,94.147,137.76,34.04,-3.79,1.2,1547.0,320.24,2017-08-28 12:10:00,119 days 00:10:00,119.006944


In [52]:
august_df = august_df.drop(columns=['day_delta'])

In [54]:
august_df.to_csv("s4_by_hand_30plus_day_offset.csv", index=False)