## Season 4 Site-specific Growing Degree Day Values
* gdd to seedling emergence
* gdd to flowering
* gdd to flag leaf emergence

In [1]:
import datetime
import numpy as np
import pandas as pd

In [6]:
df_0 = pd.read_csv('data/raw/mac_season_4.csv', low_memory=False)
print(df_0.shape)
# df_0.head()

(372363, 39)


In [7]:
# df_0.columns

In [8]:
cols_to_drop = ['Unnamed: 0', 'checked', 'result_type', 'id', 'citation_id', 'site_id', 'treatment_id', 'city', 
                'scientificname', 'commonname', 'genus', 'species_id', 'cultivar_id', 'author', 'citation_year', 
                'treatment', 'time', 'raw_date', 'month', 'year', 'dateloc', 'trait_description', 'units', 'n',
                'statname', 'stat', 'notes', 'access_level', 'entity', 'method_name', 'view_url', 'edit_url']

In [9]:
df_1 = df_0.drop(labels=cols_to_drop, axis=1)
print(df_1.shape)
# df_1.head()

(372363, 7)


In [10]:
# df_1.trait.unique()

#### Convert dates from string to datetime

In [11]:
new_dates = []

for d in df_1.date.values:
    
    if 'Phoenix' in d:
        new_name = d[:-18]
        new_dates.append(new_name)
    
    else:
        new_name = d
        new_dates.append(new_name)
        
print(df_1.shape[0])
print(len(new_dates))

372363
372363


In [12]:
iso_format_dates = pd.to_datetime(new_dates)

In [13]:
df_2 = df_1.copy()

df_2['date_1'] = iso_format_dates
# df_2.head()

#### Change sitenames to strip ` E` and ` W` subplot designations

In [14]:
sitename_values = df_2.sitename.values
no_e_w_names = []

for name in sitename_values:
    
    if name.endswith(' W') | name.endswith(' E'):
        name = name[:-2]
        no_e_w_names.append(name)
        
    else:
        no_e_w_names.append(name)

In [15]:
df_3 = df_2.copy()
df_3['sitename_1'] = no_e_w_names
# df_3.head()

#### Extra Range and Column Values

In [16]:
df_4 = df_3.copy()

df_4['range'] = df_4['sitename_1'].str.extract("Range (\d+)").astype(int)
df_4['column'] = df_4['sitename_1'].str.extract("Column (\d+)").astype(int)

# df_4.sample(n=7)

#### Select for traits to be used for `gdd_to_` calculations

In [17]:
traits_to_keep = ['seedling_emergence_rate', 'flag_leaf_emergence_time', 'flowering_time']
df_5 = df_4.loc[df_4.trait.isin(traits_to_keep)]
df_5.shape

(412, 11)

### I. Flag Leaf Emergence

In [18]:
flag_leaf_0 = df_5.loc[df_5.trait == 'flag_leaf_emergence_time']
print(flag_leaf_0.shape)
# flag_leaf_0.head()

(79, 11)


#### A. Add planting date

In [19]:
day_of_planting = datetime.date(2017,4,20)
flag_leaf_1 = flag_leaf_0.copy()

flag_leaf_1['date_of_planting'] = day_of_planting
# flag_leaf_1.head()

In [20]:
flag_leaf_2 = flag_leaf_1.copy()

#### B. Create timedelta using days to flag leaf emergence

In [21]:
timedelta_values = flag_leaf_2['mean'].values
dates_of_emergence = []

for val in timedelta_values:
    
    date_of_emergence = day_of_planting + datetime.timedelta(days=val)
    dates_of_emergence.append(date_of_emergence)
    
print(flag_leaf_2.shape[0])
print(len(dates_of_emergence))

79
79


In [22]:
flag_leaf_3 = flag_leaf_2.copy()
flag_leaf_3['date_of_flag_leaf_emergence'] = dates_of_emergence
# flag_leaf_3.head()

#### C. Rename `mean` column to `days_to_flag_leaf_emergence`

In [24]:
flag_leaf_4 = flag_leaf_3.rename({'mean': 'days_to_flag_leaf_emergence'}, axis=1)
# flag_leaf_4.head()

#### D. Add Growing Degree Days 

In [25]:
season_4_weather_data = pd.read_csv('data/processed/weather_station_season_4_2019-12-20T190949.csv')
# season_4_weather_data.head()

In [26]:
season_4_gdd = season_4_weather_data[['date', 'gdd']]
print(season_4_gdd.shape)
# season_4_gdd.head()

(150, 2)


In [27]:
season_4_gdd_1 = season_4_gdd.copy()
season_4_gdd_1.date = pd.to_datetime(season_4_gdd_1.date)
# season_4_gdd_1.head()

In [28]:
flag_leaf_5 = flag_leaf_4.copy()
flag_leaf_5.date_of_flag_leaf_emergence = pd.to_datetime(flag_leaf_5.date_of_flag_leaf_emergence)
# flag_leaf_5.dtypes

In [29]:
flag_leaf_6 = flag_leaf_5.merge(season_4_gdd_1, how='left', left_on='date_of_flag_leaf_emergence', right_on='date')
# flag_leaf_6.head()

#### E. Drop & Rename Columns

In [30]:
flag_leaf_6.columns

Index(['sitename', 'lat', 'lon', 'date_x', 'trait',
       'days_to_flag_leaf_emergence', 'cultivar', 'date_1', 'sitename_1',
       'range', 'column', 'date_of_planting', 'date_of_flag_leaf_emergence',
       'date_y', 'gdd'],
      dtype='object')

In [31]:
cols_to_drop = ['sitename', 'date_x', 'date_y', 'date_of_planting', 'lat', 'lon', 'trait']
flag_leaf_7 = flag_leaf_6.drop(labels=cols_to_drop, axis=1)
print(flag_leaf_7.shape)
# flag_leaf_7.head()

(79, 8)


Unnamed: 0,days_to_flag_leaf_emergence,cultivar,date_1,sitename_1,range,column,date_of_flag_leaf_emergence,gdd
0,50.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-06-09,799.0
1,70.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-06-29,1250.0
2,77.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-07-06,1420.0
3,72.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-07-01,1296.0
4,62.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-06-21,1050.0


In [32]:
flag_leaf_8 = flag_leaf_7.rename({'date_1': 'date', 'sitename_1': 'sitename',
                                 'gdd': 'gdd_to_flag_leaf_emergence'}, axis=1)
# flag_leaf_8.head()

Unnamed: 0,days_to_flag_leaf_emergence,cultivar,date,sitename,range,column,date_of_flag_leaf_emergence,gdd_to_flag_leaf_emergence
0,50.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-06-09,799.0
1,70.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-06-29,1250.0
2,77.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-07-06,1420.0
3,72.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-07-01,1296.0
4,62.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-06-21,1050.0


### II. Flower Emergence

In [41]:
flowering_0 = df_5.loc[df_5.trait == 'flowering_time']
print(flowering_0.shape)
# flowering_0.head()

(54, 11)


Unnamed: 0,sitename,lat,lon,date,trait,mean,cultivar,date_1,sitename_1,range,column
72497,MAC Field Scanner Season 4 Range 20 Column 11,33.075231,-111.974884,2017 Jul 20 (America/Phoenix),flowering_time,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11
167207,MAC Field Scanner Season 4 Range 20 Column 8,33.075231,-111.974933,2017 Jul 20 (America/Phoenix),flowering_time,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8
260431,MAC Field Scanner Season 4 Range 20 Column 7,33.075231,-111.97495,2017 Jul 20 (America/Phoenix),flowering_time,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7
260437,MAC Field Scanner Season 4 Range 20 Column 6,33.075231,-111.974966,2017 Jul 20 (America/Phoenix),flowering_time,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6
260444,MAC Field Scanner Season 4 Range 20 Column 3,33.075231,-111.975015,2017 Jul 20 (America/Phoenix),flowering_time,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3


#### A. Add planting date

In [42]:
flowering_1 = flowering_0.copy()
flowering_1['date_of_planting'] = day_of_planting
# flowering_1.head()

Unnamed: 0,sitename,lat,lon,date,trait,mean,cultivar,date_1,sitename_1,range,column,date_of_planting
72497,MAC Field Scanner Season 4 Range 20 Column 11,33.075231,-111.974884,2017 Jul 20 (America/Phoenix),flowering_time,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-04-20
167207,MAC Field Scanner Season 4 Range 20 Column 8,33.075231,-111.974933,2017 Jul 20 (America/Phoenix),flowering_time,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-04-20
260431,MAC Field Scanner Season 4 Range 20 Column 7,33.075231,-111.97495,2017 Jul 20 (America/Phoenix),flowering_time,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-04-20
260437,MAC Field Scanner Season 4 Range 20 Column 6,33.075231,-111.974966,2017 Jul 20 (America/Phoenix),flowering_time,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-04-20
260444,MAC Field Scanner Season 4 Range 20 Column 3,33.075231,-111.975015,2017 Jul 20 (America/Phoenix),flowering_time,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-04-20


#### B. Create timedelta using flowering time

In [43]:
timedelta_values = flowering_1['mean'].values
dates_of_emergence = []

for val in timedelta_values:
    
    date_of_emergence = day_of_planting + datetime.timedelta(days=val)
    dates_of_emergence.append(date_of_emergence)
    
print(flowering_1.shape[0])
print(len(dates_of_emergence))

54
54


In [44]:
flowering_2 = flowering_1.copy()
flowering_2['date_of_flowering'] = dates_of_emergence
# flowering_2.head()

Unnamed: 0,sitename,lat,lon,date,trait,mean,cultivar,date_1,sitename_1,range,column,date_of_planting,date_of_flowering
72497,MAC Field Scanner Season 4 Range 20 Column 11,33.075231,-111.974884,2017 Jul 20 (America/Phoenix),flowering_time,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-04-20,2017-06-15
167207,MAC Field Scanner Season 4 Range 20 Column 8,33.075231,-111.974933,2017 Jul 20 (America/Phoenix),flowering_time,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-04-20,2017-07-01
260431,MAC Field Scanner Season 4 Range 20 Column 7,33.075231,-111.97495,2017 Jul 20 (America/Phoenix),flowering_time,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-04-20,2017-07-12
260437,MAC Field Scanner Season 4 Range 20 Column 6,33.075231,-111.974966,2017 Jul 20 (America/Phoenix),flowering_time,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-04-20,2017-07-03
260444,MAC Field Scanner Season 4 Range 20 Column 3,33.075231,-111.975015,2017 Jul 20 (America/Phoenix),flowering_time,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-04-20,2017-06-24


#### C. Rename `mean` column to `days_to_flowering`

In [45]:
flowering_3 = flowering_2.rename({'mean': 'days_to_flowering'}, axis=1)
# flowering_3.head()

#### D. Add Growing Degree Days 

In [46]:
# season_4_weather_data = pd.read_csv('data/processed/weather_station_season_4_2019-12-20T190949.csv')
# season_4_weather_data.head()

In [47]:
# season_4_gdd = season_4_weather_data[['date', 'gdd']]
print(season_4_gdd.shape)
# season_4_gdd.head()

(150, 2)


In [48]:
# Code already run for flag leaf emergence
# season_4_gdd_1 = season_4_gdd.copy()
# season_4_gdd_1.date = pd.to_datetime(season_4_gdd_1.date)
# season_4_gdd_1.head()
season_4_gdd_1.dtypes

date    datetime64[ns]
gdd            float64
dtype: object

In [49]:
flowering_4 = flowering_3.copy()
flowering_4.date_of_flowering = pd.to_datetime(flowering_4.date_of_flowering)
# print(flowering_4.dtypes)
# flowering_4.tail()

Unnamed: 0,sitename,lat,lon,date,trait,days_to_flowering,cultivar,date_1,sitename_1,range,column,date_of_planting,date_of_flowering
333175,MAC Field Scanner Season 4 Range 46 Column 7,33.076165,-111.97495,2017 Jul 20 (America/Phoenix),flowering_time,72.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 46 Column 7,46,7,2017-04-20,2017-07-01
333176,MAC Field Scanner Season 4 Range 46 Column 9,33.076165,-111.974917,2017 Jul 20 (America/Phoenix),flowering_time,56.0,PI180348,2017-07-20,MAC Field Scanner Season 4 Range 46 Column 9,46,9,2017-04-20,2017-06-15
333177,MAC Field Scanner Season 4 Range 46 Column 11,33.076165,-111.974885,2017 Jul 20 (America/Phoenix),flowering_time,64.0,PI329435,2017-07-20,MAC Field Scanner Season 4 Range 46 Column 11,46,11,2017-04-20,2017-06-23
333178,MAC Field Scanner Season 4 Range 46 Column 14,33.076165,-111.974835,2017 Jul 20 (America/Phoenix),flowering_time,62.0,PI564163,2017-07-20,MAC Field Scanner Season 4 Range 46 Column 14,46,14,2017-04-20,2017-06-21
333179,MAC Field Scanner Season 4 Range 50 Column 6,33.076309,-111.974966,2017 Jul 20 (America/Phoenix),flowering_time,70.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 50 Column 6,50,6,2017-04-20,2017-06-29


In [50]:
flowering_5 = flowering_4.merge(season_4_gdd_1, how='left', left_on='date_of_flowering', right_on='date')
# flowering_5.head()

Unnamed: 0,sitename,lat,lon,date_x,trait,days_to_flowering,cultivar,date_1,sitename_1,range,column,date_of_planting,date_of_flowering,date_y,gdd
0,MAC Field Scanner Season 4 Range 20 Column 11,33.075231,-111.974884,2017 Jul 20 (America/Phoenix),flowering_time,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-04-20,2017-06-15,2017-06-15,904.0
1,MAC Field Scanner Season 4 Range 20 Column 8,33.075231,-111.974933,2017 Jul 20 (America/Phoenix),flowering_time,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-04-20,2017-07-01,2017-07-01,1296.0
2,MAC Field Scanner Season 4 Range 20 Column 7,33.075231,-111.97495,2017 Jul 20 (America/Phoenix),flowering_time,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-04-20,2017-07-12,2017-07-12,1573.0
3,MAC Field Scanner Season 4 Range 20 Column 6,33.075231,-111.974966,2017 Jul 20 (America/Phoenix),flowering_time,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-04-20,2017-07-03,2017-07-03,1343.0
4,MAC Field Scanner Season 4 Range 20 Column 3,33.075231,-111.975015,2017 Jul 20 (America/Phoenix),flowering_time,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-04-20,2017-06-24,2017-06-24,1127.0


#### E. Drop & Rename Columns

In [51]:
flowering_5.columns

Index(['sitename', 'lat', 'lon', 'date_x', 'trait', 'days_to_flowering',
       'cultivar', 'date_1', 'sitename_1', 'range', 'column',
       'date_of_planting', 'date_of_flowering', 'date_y', 'gdd'],
      dtype='object')

In [52]:
cols_to_drop = ['sitename', 'lat', 'lon', 'trait', 'date_x', 'date_y', 'date_of_planting']
flowering_6 = flowering_5.drop(labels=cols_to_drop, axis=1)
print(flowering_6.shape)
# flowering_6.head()

(54, 8)


Unnamed: 0,days_to_flowering,cultivar,date_1,sitename_1,range,column,date_of_flowering,gdd
0,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-06-15,904.0
1,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-07-01,1296.0
2,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-07-12,1573.0
3,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-07-03,1343.0
4,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-06-24,1127.0


In [53]:
flowering_7 = flowering_6.rename({'date_1': 'date', 'sitename_1': 'sitename',
                                 'gdd': 'gdd_to_flowering'}, axis=1)
# flowering_7.head()

Unnamed: 0,days_to_flowering,cultivar,date,sitename,range,column,date_of_flowering,gdd_to_flowering
0,56.0,PI527045,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-06-15,904.0
1,72.0,PI655972,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-07-01,1296.0
2,83.0,PI576401,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-07-12,1573.0
3,74.0,PI534120,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-07-03,1343.0
4,65.0,PI656026,2017-07-20,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-06-24,1127.0


### III. Seedling Emergence
* need feedback on this
* seedling emergence rate, seedling count, both, neither?

In [None]:
# seedling_0 = df_5.loc[df_5.trait == 'seedling_emergence_rate']
# print(seedling_0.shape)
# seedling_0.head()

### IV. Merge tables

#### Test Merged table to ensure the correct shape

### V. Drop and reorder columns and set index to sitename

#### Save flowering and flag leaf tables without merging 
* Drop `date`, `date_of_flowering`, and `date_of_flag_leaf_emergence` columns
* Can add later if needed

#### A. Days and gdd to flowering

In [55]:
flowering_8 = flowering_7.drop(labels=['date'], axis=1)
# flowering_8.head()

Unnamed: 0,days_to_flowering,cultivar,sitename,range,column,date_of_flowering,gdd_to_flowering
0,56.0,PI527045,MAC Field Scanner Season 4 Range 20 Column 11,20,11,2017-06-15,904.0
1,72.0,PI655972,MAC Field Scanner Season 4 Range 20 Column 8,20,8,2017-07-01,1296.0
2,83.0,PI576401,MAC Field Scanner Season 4 Range 20 Column 7,20,7,2017-07-12,1573.0
3,74.0,PI534120,MAC Field Scanner Season 4 Range 20 Column 6,20,6,2017-07-03,1343.0
4,65.0,PI656026,MAC Field Scanner Season 4 Range 20 Column 3,20,3,2017-06-24,1127.0


In [56]:
flowering_9 = flowering_8.set_index('sitename')
# flowering_9.head()

Unnamed: 0_level_0,days_to_flowering,cultivar,range,column,date_of_flowering,gdd_to_flowering
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAC Field Scanner Season 4 Range 20 Column 11,56.0,PI527045,20,11,2017-06-15,904.0
MAC Field Scanner Season 4 Range 20 Column 8,72.0,PI655972,20,8,2017-07-01,1296.0
MAC Field Scanner Season 4 Range 20 Column 7,83.0,PI576401,20,7,2017-07-12,1573.0
MAC Field Scanner Season 4 Range 20 Column 6,74.0,PI534120,20,6,2017-07-03,1343.0
MAC Field Scanner Season 4 Range 20 Column 3,65.0,PI656026,20,3,2017-06-24,1127.0


In [57]:
new_col_order = ['range', 'column', 'cultivar', 'date_of_flowering', 'days_to_flowering', 'gdd_to_flowering']
flowering_10 = pd.DataFrame(data=flowering_9, index=flowering_9.index, columns=new_col_order)
print(flowering_9.shape)
print(flowering_10.shape)
# flowering_10.head()

(54, 6)
(54, 6)


Unnamed: 0_level_0,range,column,cultivar,date_of_flowering,days_to_flowering,gdd_to_flowering
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAC Field Scanner Season 4 Range 20 Column 11,20,11,PI527045,2017-06-15,56.0,904.0
MAC Field Scanner Season 4 Range 20 Column 8,20,8,PI655972,2017-07-01,72.0,1296.0
MAC Field Scanner Season 4 Range 20 Column 7,20,7,PI576401,2017-07-12,83.0,1573.0
MAC Field Scanner Season 4 Range 20 Column 6,20,6,PI534120,2017-07-03,74.0,1343.0
MAC Field Scanner Season 4 Range 20 Column 3,20,3,PI656026,2017-06-24,65.0,1127.0


#### Check for / drop duplicates

In [61]:
flowering_10.duplicated().value_counts()

False    53
True      1
dtype: int64

In [62]:
flowering_11 = flowering_10.drop_duplicates()
# flowering_11.head()

Unnamed: 0_level_0,range,column,cultivar,date_of_flowering,days_to_flowering,gdd_to_flowering
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAC Field Scanner Season 4 Range 20 Column 11,20,11,PI527045,2017-06-15,56.0,904.0
MAC Field Scanner Season 4 Range 20 Column 8,20,8,PI655972,2017-07-01,72.0,1296.0
MAC Field Scanner Season 4 Range 20 Column 7,20,7,PI576401,2017-07-12,83.0,1573.0
MAC Field Scanner Season 4 Range 20 Column 6,20,6,PI534120,2017-07-03,74.0,1343.0
MAC Field Scanner Season 4 Range 20 Column 3,20,3,PI656026,2017-06-24,65.0,1127.0


#### B. Days and gdd to flag leaf emergence

In [33]:
flag_leaf_9 = flag_leaf_8.set_index('sitename')
# flag_leaf_9.head()

Unnamed: 0_level_0,days_to_flag_leaf_emergence,cultivar,date,range,column,date_of_flag_leaf_emergence,gdd_to_flag_leaf_emergence
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MAC Field Scanner Season 4 Range 20 Column 11,50.0,PI527045,2017-07-20,20,11,2017-06-09,799.0
MAC Field Scanner Season 4 Range 20 Column 8,70.0,PI655972,2017-07-20,20,8,2017-06-29,1250.0
MAC Field Scanner Season 4 Range 20 Column 7,77.0,PI576401,2017-07-20,20,7,2017-07-06,1420.0
MAC Field Scanner Season 4 Range 20 Column 6,72.0,PI534120,2017-07-20,20,6,2017-07-01,1296.0
MAC Field Scanner Season 4 Range 20 Column 3,62.0,PI656026,2017-07-20,20,3,2017-06-21,1050.0


In [34]:
flag_leaf_10 = flag_leaf_9.drop(labels=['date'], axis=1)
# flag_leaf_10.tail()

Unnamed: 0_level_0,days_to_flag_leaf_emergence,cultivar,range,column,date_of_flag_leaf_emergence,gdd_to_flag_leaf_emergence
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAC Field Scanner Season 4 Range 50 Column 14,60.0,PI534165,50,14,2017-06-19,996.0
MAC Field Scanner Season 4 Range 50 Column 15,56.0,PI656065,50,15,2017-06-15,904.0
MAC Field Scanner Season 4 Range 51 Column 7,48.0,PI641830,51,7,2017-06-07,757.0
MAC Field Scanner Season 4 Range 51 Column 12,91.0,PI655983,51,12,2017-07-20,1756.0
MAC Field Scanner Season 4 Range 20 Column 4,50.0,PI526905,20,4,2017-06-09,799.0


In [35]:
new_col_order = ['range', 'column', 'cultivar', 'date_of_flag_leaf_emergence', 'days_to_flag_leaf_emergence',
                 'gdd_to_flag_leaf_emergence',]
flag_leaf_11 = pd.DataFrame(data=flag_leaf_10, index=flag_leaf_10.index, columns=new_col_order)
print(flag_leaf_11.shape)
# flag_leaf_11.head()

(79, 6)


Unnamed: 0_level_0,range,column,cultivar,date_of_flag_leaf_emergence,days_to_flag_leaf_emergence,gdd_to_flag_leaf_emergence
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MAC Field Scanner Season 4 Range 20 Column 11,20,11,PI527045,2017-06-09,50.0,799.0
MAC Field Scanner Season 4 Range 20 Column 8,20,8,PI655972,2017-06-29,70.0,1250.0
MAC Field Scanner Season 4 Range 20 Column 7,20,7,PI576401,2017-07-06,77.0,1420.0
MAC Field Scanner Season 4 Range 20 Column 6,20,6,PI534120,2017-07-01,72.0,1296.0
MAC Field Scanner Season 4 Range 20 Column 3,20,3,PI656026,2017-06-21,62.0,1050.0


#### C. Drop duplicates

In [38]:
flag_leaf_11.duplicated().value_counts()

False    77
True      2
dtype: int64

In [39]:
flag_leaf_12 = flag_leaf_11.drop_duplicates()
print(flag_leaf_11.shape)
print(flag_leaf_12.shape)

(79, 6)
(78, 6)


### Final Steps: Save as `.csv` file
* Check output filename
* Change name of df

In [63]:
need_to_create_csv = False

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'days_gdd_to_flowering_season_4_{timestamp}.csv'.replace(':', '')
    flowering_11.to_csv(f'data/processed/{output_filename}')