In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ee 
import geemap
import time 
from dateutil import parser
from dateutil.relativedelta import relativedelta

# Google Earth Login

In [3]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

Enter verification code:  4/1AWtgzh67WN88jcJF0I5PYAOy-OdU1lk6Gmn2eN3OQKX6BiiJ7_3WzHW0Qpo



Successfully saved authorization token.


EEException: Not signed up for Earth Engine or project is not registered. For help, visit https://developers.google.com/earth-engine/cloud/earthengine_cloud_project_setup

## Selecting Location

In [None]:
## Loading in the feature collection that contains the shapefile for the country boundaries
Boundaries = ee.FeatureCollection('FAO/GAUL/2015/level2')

In [None]:
## Commands to generate an interactive map
GEE_Map = geemap.Map()

## Adds google Earth view
GEE_Map.add_basemap("SATELLITE")

## Adds a shapefile layer that allows the selecting of areas
GEE_Map.addLayer(Boundaries,{},'Worldmap')

## Selecting the region of interest
poi = Boundaries.filter(ee.Filter.eq('ADM2_NAME', 'Creuse'))

## Commands to add the selected region to the generated map
GEE_Map.addLayer(poi, {}, 'Selected Region')

## Command that plots the map
GEE_Map

## Retrieving Data from FEWS

In [None]:
#Enter selection period in the following format YYYY-MM-DD
date_start = '1982-01-01' 
date_end = '2020-01-01'

## Selecting the satellite collection we want to have , accompanied by selecting a time period
FEWS = ee.ImageCollection("NASA/FLDAS/NOAH01/C/GL/M/V001").filterDate(date_start,date_end)

In [None]:
date_start = parser.parse(date_start)
date_end = parser.parse(date_end)

In [None]:
difference_in_years = relativedelta(date_end, date_start).years
periods = difference_in_years * 12

In [None]:
## Selecting the bands we want to have from the specific satellite
bands_FEWS = ['Evap_tavg','Qg_tavg','RadT_tavg','SoilMoi00_10cm_tavg','SoilMoi100_200cm_tavg','SoilTemp00_10cm_tavg','SoilTemp100_200cm_tavg','Tair_f_tavg','Wind_f_tavg','Rainf_f_tavg']

In [None]:
## Timer function 
start = time.time()

##
def poi_mean(img,band_name):
    mean = img.reduceRegion(reducer=ee.Reducer.mean(), geometry=poi, scale=463).get(band_name)
    return img.set('date', img.date().format()).set('mean',mean)

In [None]:
## Creating a reduced image that contains the mean by loading in into the function 
results = []

for band_name in bands_FEWS:
    poi_reduced_imgs = FEWS.map(lambda x: poi_mean(x, band_name))
    results.append(poi_reduced_imgs)    

In [None]:
parsed_dfs = []

for result in results:
    nested_list = result.reduceColumns(ee.Reducer.toList(2), ['date','mean']).values().get(0)
    df = pd.DataFrame(nested_list.getInfo(), columns=['date','mean'])
    parsed_dfs.append(df)

In [None]:
%%capture out
df_fews = pd.concat(parsed_dfs,axis=1)
df_fews.columns = range(df_fews.shape[1])
df_fews.T.reset_index(drop=True).T

In [None]:
## Setting the correct column names corresponding with the selected satellite bands
i = 0
for column_names in df_fews:
    if column_names %2 == 1:
        df_fews.rename(columns={column_names : bands_FEWS[i]},inplace=True)
        i = i + 1 
    elif column_names %2 == 0:
        df_fews.drop(labels=column_names, axis=1,inplace=True)

In [None]:
## Creating a dataframe of the FEWS satellite, with a monthly time period
df_fews['date'] = pd.Series(pd.date_range(date_start, periods=periods, freq="M"))
df_fews.set_index(('date') ,inplace=True)

In [None]:
is_NaN = df_fews.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df_fews[row_has_NaN]
print(rows_with_NaN)

In [None]:
## Timing how long it took our script to retrieve the information
end = time.time()
print(end - start)

In [None]:
df_fews

## Retrieving data from NOAA [NDVI]

In [None]:
start = time.time()

In [None]:
NDVI = ee.ImageCollection("NOAA/CDR/AVHRR/NDVI/V5").filterDate(date_start,date_end)

In [None]:
bands_NDVI = ['NDVI']

In [None]:
#Creating a reduced image that contains the mean by loading in into the function 
results = []

for band_name in bands_NDVI:
    poi_reduced_imgs = NDVI.map(lambda x: poi_mean(x, band_name))
    results.append(poi_reduced_imgs)

In [None]:
parsed_dfs = []

for result in results:
    nested_list = result.reduceColumns(ee.Reducer.toList(2), ['date','mean']).values().get(0)
    df = pd.DataFrame(nested_list.getInfo(), columns=['date','mean'])
    parsed_dfs.append(df)

In [None]:
%%capture out
df_ndvi = pd.concat(parsed_dfs,axis=1)
df_ndvi['date'] = pd.to_datetime(df_ndvi['date'])
df_ndvi = df_ndvi.resample('M', on='date').mean() 

In [None]:
is_NaN = df_ndvi.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df_ndvi[row_has_NaN]
print(rows_with_NaN)

In [None]:
df_ndvi.reset_index(inplace=True)

In [None]:
df_ndvi['month_name'] = df_ndvi['date'].dt.strftime('%b')
df_ndvi_monthly = df_ndvi.groupby('month_name').mean()
df_ndvi_monthly.reset_index(inplace=True)

In [None]:
index_count = 0
for value in df_ndvi['mean']:
    if pd.isna(value) == True:
        nan_month = df_ndvi['month_name'][index_count]
        z = 0
        for month_name in df_ndvi_monthly['month_name']:
            if month_name == nan_month:
                df_ndvi['mean'][index_count] = df_ndvi_monthly['mean'][z]
                z = z + 1
                index_count = index_count + 1 
            else:
                z = z + 1 
    else:
        index_count = index_count + 1

In [None]:
df_ndvi.drop(['month_name'], axis=1,inplace=True)

In [None]:
df_ndvi.rename(columns={"mean": "ndvi"},inplace=True)
df_ndvi.set_index('date',inplace=True)

In [None]:
is_NaN = df_ndvi.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = df_ndvi[row_has_NaN]
print(rows_with_NaN)

In [None]:
end = time.time()
print(end - start)

In [None]:
df_ndvi.info()

## Merging all the data to one dataset

In [None]:
df = pd.concat([df_fews, df_ndvi], axis=1)

In [None]:
df

## Setting all the data in the right units 

In [None]:
# Converting precipitation and evaporation from kg m1's to mm/day
df['Evap_tavg'] = df['Evap_tavg'] * 3600 * 24 * 30
df['Rainf_f_tavg'] = df['Rainf_f_tavg'] * 3600 * 24 * 30

#Soil temp from kelvin to Celsius
df['SoilTemp00_10cm_tavg'] = df['SoilTemp00_10cm_tavg'] - 273.15
df['SoilTemp100_200cm_tavg'] = df['SoilTemp100_200cm_tavg'] - 273.15
df['Tair_f_tavg'] = df['Tair_f_tavg'] - 273.15

#NDVI converting by its scale, which is [0.0001]
df['ndvi'] = df['ndvi'] * 0.0001

In [None]:
df.isnull().values.any()

In [None]:
df.isnull().sum().sum()

In [None]:
df