In [None]:
# This file contains the processing steps for generating NIRv MODIS data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import CubicSpline 

In [None]:
# these CSVs were generated from the raw HDF MODIS downloads that can be found in file 5


file_paths = [
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/AR_Vir_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/AU_Dry_NIRv.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/BE_Vie_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/CA_TP1_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/CH_Cha_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/DE_Gri_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/FR_Pue_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/GF_Guy_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/IT_Col_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/NL_Loo_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/RU_CokNIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/RU_Fyo_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/US_PFa_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/US_Var_NIR.csv',
    '/Users/abigailbase/PROJECT FILES/NIRv DFs/ZA_Kru_NIR.csv'
]


In [None]:
keys = [
    'AR_Vir_NIRv', 'AU_Dry_NIRv', 'BE_Vie_NIRv', 'CA_TP1_NIRv', 'CH_Cha_NIRv',
    'DE_Gri_NIRv', 'FR_Pue_NIRv', 'GF_Guy_NIRv','IT_Col_NIRv', 'NL_Loo_NIRv', 'RU_Cok_NIRv',
    'RU_Fyo_NIRv', 'US_PFa_NIRv', 'US_Var_NIRv', 'ZA_Kru_NIRv'
]


In [None]:
ndvi_dict = {}

In [None]:
# function for reading the csvs and turning them to dfs

for key, file_path in zip(keys, file_paths):
    df = pd.read_csv(file_path)
    df['site_id'] = key
    ndvi_dict[key] = df


In [None]:
dfs_to_concat = [ndvi_dict[key] for key in keys]

In [None]:
#combine all of the sites into one df

combined_ndvi_df = pd.concat(dfs_to_concat, ignore_index=True)


In [None]:
# convert and sort the date col

combined_ndvi_df['date'] = pd.to_datetime(combined_ndvi_df['date'])
combined_ndvi_df = combined_ndvi_df.sort_values(by='date')


In [None]:
# investigate sites to check theyre all there

sites = combined_ndvi_df['site_id'].unique()

In [None]:
#  time series for each site to determine best interpolator for converting the data to daily

plt.figure(figsize=(14, 7))

# plot each site
for site in combined_ndvi_df['site_id'].unique():
    site_data = combined_ndvi_df[combined_ndvi_df['site_id'] == site]
    plt.plot(site_data['date'], site_data['NIRv'], marker='o', linestyle='-', label=site)

plt.xlabel('Date')
plt.ylabel('NDVI')
plt.title('NDVI Time Series for All Sites')
plt.legend(title='Site ID')
plt.tight_layout()

plt.show()

In [None]:
NIRV_drop=['year','day_of_year','day_of_year','NIR_point','NIRv','site_id']

In [None]:
# function for applying spline interpolator to convert to daily obs


def interpolate_ndvi(ndvi_dict, key, NDVI_drop):

    AR_Vir_NDVI = ndvi_dict[key]
    
    # drop specified columns
    AR_Vir_NDVI = AR_Vir_NDVI.drop(columns=NDVI_drop)
    
    # convert date and set as index
    AR_Vir_NDVI['date'] = pd.to_datetime(AR_Vir_NDVI['date'])
    AR_Vir_NDVI.set_index('date', inplace=True)
    
    # drop data from before 2010
    AR_Vir_NDVI = AR_Vir_NDVI[AR_Vir_NDVI.index >= '2010-01-01']
    
    # resample df to daily 
    AR_Vir_NDVI_daily = AR_Vir_NDVI.resample('D').asfreq()
    
    # drop  rows where NDVI_point is missing 
    valid_data = AR_Vir_NDVI_daily.dropna(subset=['NDVI_point'])
    
    # convert dates to ordinal for interpolation
    dates = valid_data.index.map(pd.Timestamp.toordinal).values
    ndvi_values = valid_data['NDVI_point'].values
    
    # cubic spline interpolator
    cs = CubicSpline(dates, ndvi_values, extrapolate=True)
    
    full_dates = AR_Vir_NDVI_daily.index.map(pd.Timestamp.toordinal)
    AR_Vir_NDVI_daily['NDVI_point'] = cs(full_dates)
    
    return AR_Vir_NDVI_daily


In [None]:
# apply to all the sites

In [None]:
#AR-Vir

AR_Vir_NDVI=ndvi_dict['AR_Vir_NIRv']

In [None]:
AR_Vir_NDVI_daily=interpolate_ndvi(ndvi_dict,'AR_Vir_NIRv',NIRV_drop)

In [None]:
AR_Vir_NDVI_daily.isna().sum()

In [None]:
AR_Vir_NDVI_daily

In [None]:
AR_Vir_NDVI_daily['site_id']='AR-Vir'

In [None]:
AU_Dry_NDVI_daily=interpolate_ndvi(ndvi_dict,'AU_Dry_NIRv',NIRV_drop)

In [None]:
AU_Dry_NDVI_daily.isna().sum()

In [None]:
AU_Dry_NDVI_daily['site_id']='AU-Dry'

In [None]:
BE_Vie_NDVI_daily=interpolate_ndvi(ndvi_dict,'BE_Vie_NIRv',NIRV_drop)

In [None]:
BE_Vie_NDVI_daily.isna().sum()

In [None]:
BE_Vie_NDVI_daily['site_id']='BE-Vie'

In [None]:
CA_TP1_NDVI_daily=interpolate_ndvi(ndvi_dict,'CA_TP1_NIRv',NIRV_drop)

In [None]:
CA_TP1_NDVI_daily.isna().sum()

In [None]:
CA_TP1_NDVI_daily['site_id']='CA-TP1'

In [None]:
CH_Cha_NDVI_daily=interpolate_ndvi(ndvi_dict,'CH_Cha_NIRv',NIRV_drop)

In [None]:
CH_Cha_NDVI_daily.isna().sum()

In [None]:
CH_Cha_NDVI_daily['site_id']='CH-Cha'

In [None]:
DE_Gri_NDVI_daily=interpolate_ndvi(ndvi_dict,'DE_Gri_NIRv',NIRV_drop)

In [None]:
DE_Gri_NDVI_daily.isna().sum()

In [None]:
DE_Gri_NDVI_daily['site_id']='DE-Gri'

In [None]:
FR_Pue_NDVI_daily=interpolate_ndvi(ndvi_dict,'FR_Pue_NIRv',NIRV_drop)

In [None]:
FR_Pue_NDVI_daily.isna().sum()

In [None]:
FR_Pue_NDVI_daily['site_id']='FR-Pue'

In [None]:
GF_Guy_NDVI_daily=interpolate_ndvi(ndvi_dict,'GF_Guy_NIRv',NIRV_drop)

In [None]:
GF_Guy_NDVI_daily.isna().sum()

In [None]:
GF_Guy_NDVI_daily['site_id']='GF-Guy'

In [None]:
IT_Col_NDVI_daily=interpolate_ndvi(ndvi_dict,'IT_Col_NIRv',NIRV_drop)

In [None]:
IT_Col_NDVI_daily.isna().sum()

In [None]:
IT_Col_NDVI_daily['site_id']='IT-Col'

In [None]:
NL_Loo_NDVI_daily=interpolate_ndvi(ndvi_dict,'NL_Loo_NIRv',NIRV_drop)

In [None]:
NL_Loo_NDVI_daily.isna().sum()

In [None]:
NL_Loo_NDVI_daily['site_id']='NL-Loo'

In [None]:
RU_Cok_NDVI=ndvi_dict['RU_Cok_NIRv']

In [None]:
RU_Cok_NDVI['date']=pd.to_datetime(RU_Cok_NDVI['date'])

In [None]:
RU_Cok_NDVI= RU_Cok_NDVI.sort_values(by='date')


In [None]:
RU_Cok_NDVI=RU_Cok_NDVI.dropna()

In [None]:
RU_Cok_NDVI=RU_Cok_NDVI.drop_duplicates(subset='date',keep='last')

In [None]:
RU_Cok_NDVI.set_index('date', inplace=True)


In [None]:
RU_Cok_NDVI = RU_Cok_NDVI[RU_Cok_NDVI.index >= '2010-01-01']


In [None]:
RU_Cok_NDVI_daily = RU_Cok_NDVI.resample('D').asfreq()

In [None]:
valid_data = RU_Cok_NDVI_daily.dropna(subset=['NDVI_point'])

dates = valid_data.index.map(pd.Timestamp.toordinal).values
nirv_values = valid_data['NDVI_point'].values

cs = CubicSpline(dates, nirv_values, extrapolate=True)
    
full_dates = RU_Cok_NDVI_daily.index.map(pd.Timestamp.toordinal)
RU_Cok_NDVI_daily['NDVI_point'] = cs(full_dates)
    
RU_Cok_NDVI_daily


In [None]:
RU_Cok_NDVI_daily.isna().sum()

In [None]:
RU_Cok_NDVI_daily=RU_Cok_NDVI_daily.drop(columns=['year','day_of_year',
                                                 'NIR_point','NIRv','site_id'])

In [None]:
RU_Cok_NDVI_daily['site_id']='RU-Cok'

In [None]:
RU_Fyo_NDVI_daily=interpolate_ndvi(ndvi_dict,'RU_Fyo_NIRv',NIRV_drop)

In [None]:
RU_Fyo_NDVI_daily

In [None]:
RU_Fyo_NDVI.isna().sum()

In [None]:
full_date_range = pd.date_range(start='2010-01-01', end='2014-12-19', freq='D')


In [None]:
RU_Fyo_NDVI_daily = RU_Fyo_NDVI_daily.reindex(full_date_range)


In [None]:
RU_Fyo_NDVI_daily.isna().sum()

In [None]:
RU_Fyo_NDVI_daily

In [None]:
knn_imputer = KNNImputer(n_neighbors=5)

RU_Fyo_NDVI_daily_imp = pd.DataFrame(knn_imputer.fit_transform(RU_Fyo_NDVI_daily), columns=RU_Fyo_NDVI_daily.columns)


In [None]:
RU_Fyo_NDVI_daily_imp.index = RU_Fyo_NDVI_daily.index


In [None]:
RU_Fyo_NDVI_daily_imp

In [None]:
RU_Fyo_NDVI_daily_imp.isna().sum()

In [None]:
RU_Fyo_NDVI_daily_imp['site_id']='RU-Fyo'

In [None]:
US_PFa_NDVI_daily=interpolate_ndvi(ndvi_dict,'US_PFa_NIRv',NIRV_drop)

In [None]:
US_PFa_NDVI_daily.isna().sum()

In [None]:
US_PFa_NDVI_daily['site_id']='US-PFa'

In [None]:
US_Var_NDVI_daily=interpolate_ndvi(ndvi_dict,'US_Var_NIRv',NIRV_drop)

In [None]:
US_Var_NDVI_daily.isna().sum()

In [None]:
US_Var_NDVI_daily

In [None]:
US_Var_NDVI_daily['site_id']='US-Var'

In [None]:
ZA_Kru_NDVI_daily=interpolate_ndvi(ndvi_dict,'ZA_Kru_NIRv',NIRV_drop)

In [None]:
ZA_Kru_NDVI_daily.isna().sum()

In [None]:
ZA_Kru_NDVI_daily['site_id']='ZA-Kru'

In [None]:
# merge them all 

NDVI_merge=pd.concat([AR_Vir_NDVI_daily,AU_Dry_NDVI_daily,BE_Vie_NDVI_daily,
                    CA_TP1_NDVI_daily,CH_Cha_NDVI_daily,DE_Gri_NDVI_daily,
                    FR_Pue_NDVI_daily,GF_Guy_NDVI_daily,IT_Col_NDVI_daily,
                    NL_Loo_NDVI_daily,RU_Cok_NDVI_daily,RU_Fyo_NDVI_daily_imp,
                    US_PFa_NDVI_daily,US_Var_NDVI_daily,ZA_Kru_NDVI_daily])

In [None]:
NDVI_merge.to_csv('/Users/abigailbase/PROJECT FILES/FINAL DFs/NDVI.csv')

In [None]:
df_merged=pd.read_csv('/Users/abigailbase/PROJECT FILES/FINAL DFs/FINAL_FINAL_DF.csv',index_col=0)

In [None]:
AR_Vir=df_merged[df_merged['SITE_ID']=='AR-Vir']#1
AU_Dry=df_merged[df_merged['SITE_ID']=='AU-Dry']#2
BE_Vie=df_merged[df_merged['SITE_ID']=='BE-Vie']#3
CA_TP1=df_merged[df_merged['SITE_ID']=='CA-TP1']#4
CH_Cha=df_merged[df_merged['SITE_ID']=='CH-Cha']#5
DE_Gri=df_merged[df_merged['SITE_ID']=='DE-Gri']#6
FR_Pue=df_merged[df_merged['SITE_ID']=='FR-Pue']#7
GF_Guy=df_merged[df_merged['SITE_ID']=='GF-Guy']#8
IT_Col=df_merged[df_merged['SITE_ID']=='IT-Col']#9
NL_Loo=df_merged[df_merged['SITE_ID']=='NL-Loo']#10
RU_Cok=df_merged[df_merged['SITE_ID']=='RU-Cok']#11
RU_Fyo=df_merged[df_merged['SITE_ID']=='RU-Fyo']#12
US_PFa=df_merged[df_merged['SITE_ID']=='US-PFa']#13
US_Var=df_merged[df_merged['SITE_ID']=='US-Var']#14
ZA_Kru=df_merged[df_merged['SITE_ID']=='ZA-Kru']#15

In [None]:
AR_Vir['NDVI']=AR_Vir_NDVI_daily['NDVI_point']

In [None]:
AU_Dry['NDVI']=AU_Dry_NDVI_daily['NDVI_point']

In [None]:
BE_Vie['NDVI']=BE_Vie_NDVI_daily['NDVI_point']

In [None]:
CA_TP1['NDVI']=CA_TP1_NDVI_daily['NDVI_point']

In [None]:
CH_Cha['NDVI']=CH_Cha_NDVI_daily['NDVI_point']

In [None]:
DE_Gri['NDVI']=DE_Gri_NDVI_daily['NDVI_point']

In [None]:
FR_Pue['NDVI']=FR_Pue_NDVI_daily['NDVI_point']

In [None]:
GF_Guy['NDVI']=GF_Guy_NDVI_daily['NDVI_point']

In [None]:
IT_Col['NDVI']=IT_Col_NDVI_daily['NDVI_point']

In [None]:
NL_Loo['NDVI']=NL_Loo_NDVI_daily['NDVI_point']

In [None]:
RU_Cok['NDVI']=RU_Cok_NDVI_daily['NDVI_point']

In [None]:
RU_Fyo_NDVI_daily_imp

In [None]:
RU_Fyo

In [None]:
RU_Fyo['NDVI']=RU_Fyo_NDVI_daily_imp['NDVI_point']

In [None]:
RU_Fyo.isna().sum()

In [None]:
from sklearn.impute import KNNImputer


In [None]:
US_PFa['NDVI']=US_PFa_NDVI_daily['NDVI_point']

In [None]:
US_PFa.isna().sum()

In [None]:
US_Var['NDVI']=US_Var_NDVI_daily['NDVI_point']

In [None]:
US_Var.isna().sum()

In [None]:
ZA_Kru['NDVI']=ZA_Kru_NDVI_daily['NDVI_point']

In [None]:
ZA_Kru.isna().sum()

In [None]:
final_df=pd.concat([AR_Vir,AU_Dry,BE_Vie,CA_TP1,CH_Cha,DE_Gri,FR_Pue,GF_Guy,IT_Col,NL_Loo,
                   RU_Cok,RU_Fyo,US_PFa,US_Var,ZA_Kru])

In [None]:
final_df.isna().sum()

In [None]:
final_df

In [None]:
final_df.to_csv('/Users/abigailbase/PROJECT FILES/FINAL DFs/FINAL_FINAL_DF.csv',index=True)

In [None]:
final_df.isna().sum()

In [None]:
## Import the ssrd data

ERA5_AR_Vir=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/AR-Vir.csv',parse_dates=['valid_time'])
ERA5_AU_Dry=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/AU-Dry.csv',parse_dates=['valid_time'])
ERA5_BE_Vie=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/BE-Vie.csv',parse_dates=['valid_time'])
ERA5_CA_TP1=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/CA-TP1.csv',parse_dates=['valid_time'])
ERA5_CH_Cha=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/CH-Cha.csv',parse_dates=['valid_time'])
ERA5_DE_Gri=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/DE-Gri.csv',parse_dates=['valid_time'])
ERA5_FR_Pue=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/FR-Pue.csv',parse_dates=['valid_time'])
ERA5_GF_Guy=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/GF-Guy.csv',parse_dates=['valid_time'])
ERA5_IT_Col=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/IT-Col.csv',parse_dates=['valid_time'])
ERA5_NL_Loo=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/NL-Loo.csv',parse_dates=['valid_time'])
ERA5_RU_Cok=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/RU-Cok.csv',parse_dates=['valid_time'])
ERA5_RU_Fyo=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/RU-Fyo.csv',parse_dates=['valid_time'])
ERA5_US_PFa=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/US-PFa.csv',parse_dates=['valid_time'])
ERA5_US_Var=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/US-Var.csv',parse_dates=['valid_time'])
ERA5_ZA_Kru=pd.read_csv('/Users/abigailbase/PROJECT FILES/ERA5 CSVs/ZA-Kru.csv',parse_dates=['valid_time'])
