## Season 4 Site-specific Growing Degree Day Values
* gdd to seedling emergence
* gdd to flowering
* gdd to flag leaf emergence

In [None]:
import datetime
import numpy as np
import pandas as pd

In [None]:
df_0 = pd.read_csv('data/raw/mac_season_4.csv', low_memory=False)
print(df_0.shape)
# df_0.head()

In [None]:
# df_0.columns

In [None]:
cols_to_drop = ['Unnamed: 0', 'checked', 'result_type', 'id', 'citation_id', 'site_id', 'treatment_id', 'city', 
                'scientificname', 'commonname', 'genus', 'species_id', 'cultivar_id', 'author', 'citation_year', 
                'treatment', 'time', 'raw_date', 'month', 'year', 'dateloc', 'trait_description', 'units', 'n',
                'statname', 'stat', 'notes', 'access_level', 'entity', 'method_name', 'view_url', 'edit_url']

In [None]:
df_1 = df_0.drop(labels=cols_to_drop, axis=1)
print(df_1.shape)
# df_1.head()

#### Convert dates from string to datetime

In [None]:
new_dates = []

for d in df_1.date.values:
    
    if 'Phoenix' in d:
        new_name = d[:-18]
        new_dates.append(new_name)
    
    else:
        new_name = d
        new_dates.append(new_name)
        
print(df_1.shape[0])
print(len(new_dates))

In [None]:
iso_format_dates = pd.to_datetime(new_dates)

In [None]:
df_2 = df_1.copy()

df_2['date_1'] = iso_format_dates
# df_2.head()

#### Change sitenames to strip ` E` and ` W` subplot designations

In [None]:
sitename_values = df_2.sitename.values
no_e_w_names = []

for name in sitename_values:
    
    if name.endswith(' W') | name.endswith(' E'):
        name = name[:-2]
        no_e_w_names.append(name)
        
    else:
        no_e_w_names.append(name)

In [None]:
df_3 = df_2.copy()
df_3['sitename_1'] = no_e_w_names
# df_3.head()

#### Extra Range and Column Values

In [None]:
df_4 = df_3.copy()

df_4['range'] = df_4['sitename_1'].str.extract("Range (\d+)").astype(int)
df_4['column'] = df_4['sitename_1'].str.extract("Column (\d+)").astype(int)

# df_4.sample(n=7)

#### Select for traits to be used for `gdd_to_` calculations

In [None]:
traits_to_keep = ['seedling_emergence_rate', 'flag_leaf_emergence_time', 'flowering_time']
df_5 = df_4.loc[df_4.trait.isin(traits_to_keep)]
df_5.shape

### I. Add date of emergence
* Test with flag leaf emergence

In [None]:
flag_leaf_0 = df_5.loc[df_5.trait == 'flag_leaf_emergence_time']
print(flag_leaf_0.shape)
flag_leaf_0.head()

In [None]:
day_of_planting = datetime.date(2017,4,20)
flag_leaf_1 = flag_leaf_0.copy()

flag_leaf_1['date_of_planting'] = day_of_planting
flag_leaf_1.head()

In [None]:
flag_leaf_2 = flag_leaf_1.copy()

In [None]:
timedelta_values = flag_leaf_2['mean'].values
dates_of_emergence = []

for val in timedelta_values:
    
    date_of_emergence = day_of_planting + datetime.timedelta(days=val)
    dates_of_emergence.append(date_of_emergence)
    
print(flag_leaf_2.shape[0])
print(len(dates_of_emergence))

In [None]:
flag_leaf_3 = flag_leaf_2.copy()
flag_leaf_3['date_of_flag_leaf_emergence'] = dates_of_emergence
# flag_leaf_3.head()

### II. Add Growing Degree Days 

In [None]:
season_4_weather_data = pd.read_csv('data/processed/weather_station_season_4_2019-12-20T190949.csv')
# season_4_weather_data.head()

In [None]:
season_4_gdd = season_4_weather_data[['date', 'gdd']]
print(season_4_gdd.shape)
season_4_gdd.head()

In [None]:
season_4_gdd_1 = season_4_gdd.copy()
season_4_gdd_1.date = pd.to_datetime(season_4_gdd_1.date)
# season_4_gdd_1.head()

In [None]:
flag_leaf_4 = flag_leaf_3.copy()
flag_leaf_4.date_of_flag_leaf_emergence = pd.to_datetime(flag_leaf_4.date_of_flag_leaf_emergence)
flag_leaf_4.dtypes

In [None]:
flag_leaf_5 = flag_leaf_4.merge(season_4_gdd_1, how='left', left_on='date_of_flag_leaf_emergence', right_on='date')
# flag_leaf_5.head()

### III. Drop, Rename, and Reorder Columns

In [None]:
flag_leaf_5.columns

In [None]:
cols_to_drop = ['sitename', 'date_x', 'date_y', 'date_of_planting']
flag_leaf_6 = flag_leaf_5.drop(labels=cols_to_drop, axis=1)
print(flag_leaf_6.shape)
# flag_leaf_6.head()

In [None]:
flag_leaf_7 = flag_leaf_6.rename({'date_1': 'date', 'sitename_1': 'sitename', 'mean': 'value',
                                 'gdd': 'gdd_to_flag_leaf_emergence'}, axis=1)
# flag_leaf_7.head()

In [None]:
new_col_order = ['sitename', 'range', 'column', 'lat', 'lon', 'cultivar', 'trait', 'value', 'date_of_flag_leaf_emergence', 'gdd_to_flag_leaf_emergence']
flag_leaf_8 = pd.DataFrame(data=flag_leaf_7, columns=new_col_order, index=flag_leaf_7.index)
print(flag_leaf_8.shape)
# flag_leaf_8.head()

In [None]:
flag_leaf_9 = flag_leaf_8.set_index('sitename')
# flag_leaf_9.head()

### Final Steps: Save as `.csv` file
* Check output filename
* Change name of df

In [None]:
need_to_create_csv = False

if need_to_create_csv:

    timestamp = datetime.datetime.now().replace(microsecond=0).isoformat()
    output_filename = f'gdd_to_flag_leaf_emergence_season_4_{timestamp}.csv'.replace(':', '')
    flag_leaf_9.to_csv(f'data/processed/{output_filename}')