In [1]:
import ee
ee.Authenticate()
ee.Initialize()

# Pre-Processing raw NetCDF files
While most of the satellite imagery is sourced from the Google Earth Engine, precipitation data is processed from the raw NetCDF files as a matter of skill expression. Datasets from 2017-2021 are downloaded.

In [2]:
## To download precipitation data from the Climate Data Store [Should raise a request first before running this]

import cdsapi

c = cdsapi.Client()

c.retrieve(
    'insitu-gridded-observations-global-and-regional',
    {
        'origin': 'imerg',
        'region': 'global',
        'variable': 'precipitation',
        'time_aggregation': 'daily',
        'horizontal_aggregation': '0_2_x_0_2',
        'year': [
            '2017', '2018', '2019',
            '2020', '2021',
        ],
        'version': 'v6.0',
        'format': 'zip',
    },
    'download.zip')

In [133]:
from netCDF4 import Dataset
import pandas as pd

In [101]:
#Not added to Github as its a huge file.
dataset = Dataset("IMERG_total_precipitation_day_0.2x0.2_global_2021_v6.0.nc", mode='r')
dataset.variables.keys()

dict_keys(['time', 'lon', 'lat', 'pr'])

In [102]:
dataset.variables

{'time': <class 'netCDF4._netCDF4.Variable'>
 float32 time(time)
     standard_name: time
     long_name: time
     units: days since 2000-06-19
     calendar: gregorian
     axis: T
 unlimited dimensions: time
 current shape = (300,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lon': <class 'netCDF4._netCDF4.Variable'>
 float32 lon(lon)
     standard_name: longitude
     long_name: Longitude
     units: degrees_east
     axis: X
 unlimited dimensions: 
 current shape = (1800,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'lat': <class 'netCDF4._netCDF4.Variable'>
 float32 lat(lat)
     standard_name: latitude
     long_name: Latitude
     units: degrees_north
     axis: Y
 unlimited dimensions: 
 current shape = (900,)
 filling on, default _FillValue of 9.969209968386869e+36 used,
 'pr': <class 'netCDF4._netCDF4.Variable'>
 float32 pr(time, lat, lon)
     standard_name: lwe_precipitation_rate
     long_name: total daily precipitation
     units: 

In [85]:
dataset.variables["pr"][200,:,:]

masked_array(
  data=[[21.27661 , 23.745197],
        [34.667004, 37.246216]],
  mask=False,
  fill_value=1e+20,
  dtype=float32)

In [108]:
dataset.variables['pr'][1,:,:]

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [--, --, --, ..., --, --, --],
        [0.0, --, --, ..., --, --, 0.0],
        [0.0, 0.0, --, ..., --, 0.0, 0.0]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [False,  True,  True, ...,  True,  True, False],
        [False, False,  True, ...,  True, False, False]],
  fill_value=3e+33,
  dtype=float32)

In [86]:
import xarray as xr
import rioxarray as rio
import numpy as np
from netCDF4 import Dataset
import fiona
from osgeo import gdal, ogr

def clip_to_bounds(ncfile,variable, polygon, new_file_name):
    '''
    ncfile: Path to the .nc file
    variable: A .nc file can have many variables measured. But we'll present a GeoTIFF of only single variable/band.
    polygon: path to shapefile/geojson that to which the .nc file has to be clipped
    new_file_name: Path and name of the new files created. (Without extensions - we'll add extensions within the code as and when required)
    '''
    #Open and read the .nc file
    data = Dataset(ncfile, mode='r')#.groups['PRODUCT']

    lats = data.variables["lat"][:]
    lons = data.variables["lon"][:]
    tims = data.variables["time"][:]
    
    # Capture geographical bounds of the polygon
    with fiona.open(polygon) as f:
        geom_bounds = f.bounds
    #geom=f[0]['geometry']
    latbound = [geom_bounds[1],geom_bounds[3]]
    lonbound = [geom_bounds[0],geom_bounds[2]]
    
    
    # Making the bounding box for the satellite image - INDEXES
    lat_lb = np.argmin(abs(lats-latbound[0]))
    lat_ub = np.argmin(abs(lats-latbound[1]))

    lon_lb = np.argmin(abs(lons-lonbound[0]))
    lon_ub = np.argmin(abs(lons-lonbound[1]))
    
    #Reading only values in the bounding box
    lat_sub = lats[lat_lb:lat_ub]
    lon_sub = lons[lon_lb:lon_ub]
    
    # Create a new empty NetCDF Files into which the subset of given netCDF file will be written
    my_file = Dataset(str(new_file_name)+'.nc','w',format='NETCDF4')
    
    # Add dimensions to the empty NDF
    ldim = abs(lat_lb-lat_ub)
    lndim = abs(lon_ub-lon_lb)
    
    lat_dim = my_file.createDimension('lat',ldim)
    lon_dim = my_file.createDimension('lon',lndim)
    time_dim = my_file.createDimension('time',None)
    
    
    #Create variables in the empty NetCDF
    time = my_file.createVariable('time', np.float32, ('time',))
    time.units=data.variables['time'].units
    time.long_name='time'

    latitudes = my_file.createVariable("lat", 'f4', ('lat',))
    latitudes.units=data.variables['lat'].units

    longitudes = my_file.createVariable("lon", 'f4', ('lon',))
    longitudes.units=data.variables['lon'].units
    
    
    new_nc_variable = my_file.createVariable(str(variable), np.float32, ('time','lat','lon'))
    new_nc_variable.units=data.variables[str(variable)].units
    
    ## Write data to variables
    latitudes[:] = lat_sub
    longitudes[:] = lon_sub

    for i,val in enumerate(tims):
        time[i] = val
        new_nc_variable[i,:,:] = data.variables[variable][i,lat_lb:lat_ub,lon_lb:lon_ub]
        
    data.close()
    my_file.close()
    
    
    # Convert to the clipped file to GeoTiff and save it.
    sub_ncfile = xr.open_dataset(str(new_file_name)+'.nc')
    var = sub_ncfile[variable]
    var = var.rio.set_spatial_dims('lon','lat')
    var.rio.set_crs("epsg:4326")
    var.rio.to_raster(str(new_file_name)+r".tif")
    
    sub_ncfile.close()
    # Clip to polygon and save it.
    OutTile = gdal.Warp(str(new_file_name)+r"_clipped.tif", 
                    str(new_file_name)+r".tif", 
                    cutlineDSName=polygon,
                    cropToCutline=True,
                    dstNodata = 0)
    OutTile = None 
    return None

In [87]:
#Files not added to Github as its a huge file.

clip_to_bounds("IMERG_total_precipitation_day_0.2x0.2_global_2021_v6.0.nc","pr","Delhi.geojson",'Delhi_pr_2021')
clip_to_bounds("IMERG_total_precipitation_day_0.2x0.2_global_2020_v6.0.nc","pr","Delhi.geojson",'Delhi_pr_2020')
clip_to_bounds("IMERG_total_precipitation_day_0.2x0.2_global_2019_v6.0.nc","pr","Delhi.geojson",'Delhi_pr_2019')
clip_to_bounds("IMERG_total_precipitation_day_0.2x0.2_global_2018_v6.0.nc","pr","Delhi.geojson",'Delhi_pr_2018')
clip_to_bounds("IMERG_total_precipitation_day_0.2x0.2_global_2017_v6.0.nc","pr","Delhi.geojson",'Delhi_pr_2017')

In [167]:
delhi_rainfall = []
days_since_2000_06_19 = []
for year in range(2017,2022):
    delhi_pr = Dataset("Delhi_pr_"+str(year)+".nc", mode='r')
    
    for i in range(delhi_pr.variables['time'].shape[0]):
        delhi_rainfall.append(delhi_pr.variables['pr'][i,:,:].mean())
        days_since_2000_06_19.append(int(delhi_pr.variables['time'][i].data.mean()))

In [168]:
delhi_precipitation_df = pd.DataFrame([days_since_2000_06_19,delhi_rainfall]).T
delhi_precipitation_df.columns = ['days_since_2000_06_19','Precipitation (mm/day)']
delhi_precipitation_df['days_since_2000_06_19'] = delhi_precipitation_df['days_since_2000_06_19'].astype(int)
delhi_precipitation_df

Unnamed: 0,days_since_2000_06_19,Precipitation (mm/day)
0,6040,0.000000
1,6041,0.000000
2,6042,0.000000
3,6043,0.000000
4,6044,0.000000
...,...,...
1756,7796,0.000000
1757,7797,22.614918
1758,7798,0.000000
1759,7799,0.000000


In [160]:
import datetime
from datetime import date, timedelta

days_after = ()
days_after


datetime.datetime(2017, 1, 1, 0, 0)

In [171]:
satellite_first_image = datetime.datetime(2000, 6, 19)

delhi_precipitation_df['date'] = delhi_precipitation_df['days_since_2000_06_19'].apply(lambda x:satellite_first_image+timedelta(days=x))
delhi_precipitation_df[['date','Precipitation (mm/day)']].to_csv('Delhi_rainfall_timeseries.csv',index=False)

Unnamed: 0,date,Precipitation (mm/day)
0,2017-01-01,0.000000
1,2017-01-02,0.000000
2,2017-01-03,0.000000
3,2017-01-04,0.000000
4,2017-01-05,0.000000
...,...,...
1756,2021-10-23,0.000000
1757,2021-10-24,22.614918
1758,2021-10-25,0.000000
1759,2021-10-26,0.000000


# Google Earth Engine

Data would be downloaded at two aggregations:
1. Delhi UT level.
2. District level in Delhi.

### NIGHT LIGHTS DATA - STRAY LIGHT CORRECTED - NOAA VIIRS

In [2]:
import ee
import geemap

In [173]:
admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2")
delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'))
geometry = delhi.geometry()

delhi_districts = ee.FeatureCollection("projects/ee-saikrishna/assets/Delhi_districts");

dataset = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG') \
                  .filter(ee.Filter.date('2017-01-01', '2022-04-22')).filter(ee.Filter.bounds(geometry))


#print(dataset.size())

night_lights_timeseries = dataset.select('avg_rad').toBands()

#Delhi UT Level.
geemap.zonal_statistics(night_lights_timeseries, delhi, 'night_lights_timeseries.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/00ee41c66400e78d1dbbfa344ce107dd-e8172c8fe54088c790d0e4c0de138f75:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\night_lights_timeseries.csv


In [174]:
#Delhi district Level.
geemap.zonal_statistics(night_lights_timeseries, delhi_districts, 'district_level_night_lights_timeseries.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/caee29ea0fc6eacbf3f1e18863589b2e-f9742c880ba753cf3071f95b0543e817:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\district_level_night_lights_timeseries.csv


### POLLUTION DATA - TROPOMI

In [176]:
admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2")
delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'))
geometry = delhi.geometry()

delhi_districts = ee.FeatureCollection("projects/ee-saikrishna/assets/Delhi_districts");

dataset = ee.ImageCollection('COPERNICUS/S5P/NRTI/L3_NO2') \
                  .filter(ee.Filter.date('2017-01-01', '2022-04-22')).filter(ee.Filter.bounds(geometry))


#print(dataset.size())

no2_pollution = dataset.select('NO2_column_number_density').toBands()

#Delhi UT Level.
geemap.zonal_statistics(no2_pollution, delhi, 'NO2_column_number_density.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/2e25459b65838ef0bbacddbb5f6521b3-39d1994e4eb41ac457bcae720f233712:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\NO2_column_number_density.csv


In [177]:
#Delhi districts Level.
geemap.zonal_statistics(no2_pollution, delhi_districts, 'district_level_NO2_column_number_density.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/39608e621e287c2d87a092252e0b4ae6-de05958eaa53645c86b369e044026680:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\district_level_NO2_column_number_density.csv


### AEROSOL DATA - MODIS

In [4]:
admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2")
delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'))
geometry = delhi.geometry()

delhi_districts = ee.FeatureCollection("projects/ee-saikrishna/assets/Delhi_districts");

dataset = ee.ImageCollection('MODIS/006/MCD19A2_GRANULES');

def getQABits(image, start, end, newName):
   # Compute the bits we need to extract
    pattern = 0;
    for i in range(start, end+1):
        pattern = pattern + 2**i;
        i = i+1;
    #Return a single band image of the extracted QA bits, giving the band a new name.
    return image.select([0], [newName]).bitwiseAnd(pattern).rightShift(start);


def maskQuality(image):
    #Select QA band
    QA = image.select('AOD_QA');
    # Get the internal_cloud_algorithm_flag bit.
    internalQuality = getQABits(QA,8, 11, 'internal_quality_flag');
    #Return an image masking out cloudy areas.
    return image.updateMask(internalQuality.eq(0));
        
        
#Delhi UT Level.
for year in range(2018,2022):
    print(year)
    filtered = dataset.filterDate(str(year)+'-01-01', str(year+1)+'-04-22').filterBounds(geometry);
    AODmaskQ = filtered.map(maskQuality).select('Optical_Depth_047').toBands();

    
    #NDVI - Delhi UT Level.
    geemap.zonal_statistics(AODmaskQ, delhi, str(year)+'_aod.csv', statistics_type='MEAN', scale=30)

2018
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/fda9fa2639ff930772a4f25e4b636248-7d7215ca375cb257978f6f1b6c29bf89:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2018_aod.csv
2019
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/d28890ff0c2d7fd9cc002afa7a788fde-9aeb418822cd56cad44aa1986e47ef06:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2019_aod.csv
2020
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/30229eb2b25aa2c6ca09e39704b5f744-14e350698e697b3d897e83d50300d1db:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2020_aod.csv
2021
Computing statistics ...
Generating URL ...
Downloading dat

In [179]:
#Delhi districts Level.
geemap.zonal_statistics(AODmaskQ, delhi_districts, 'district_level_aerosol.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/342f4853f8d8528707b17c58bc8d64c8-8b2e004460e2d82db4a54039131a46af:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\district_level_aerosol.csv


### TEMPERATURE DATA

In [180]:
admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2")
delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'))
geometry = delhi.geometry()

delhi_districts = ee.FeatureCollection("projects/ee-saikrishna/assets/Delhi_districts");

dataset = ee.ImageCollection('IDAHO_EPSCOR/TERRACLIMATE') \
                  .filter(ee.Filter.date('2017-01-01', '2022-04-22')).filter(ee.Filter.bounds(geometry))


#print(dataset.size())

max_temp = dataset.select('tmmx').toBands()

#Delhi UT Level.
geemap.zonal_statistics(max_temp, delhi, 'max_temp.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/c24c090790703e8d9bf46fbc9974240e-bc753a1e2d8a9e3411b9420ef7007859:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\max_temp.csv


In [181]:
#Delhi districts Level.
geemap.zonal_statistics(max_temp, delhi_districts, 'district_level_max_tempt.csv', statistics_type='MEAN', scale=30)

Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/5c3fbe21c4fc88fc0fb2f965ecdab4a5-3dcacb56426aeacfe62f2534de97d1ef:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\district_level_max_tempt.csv


### NDVI, MNDWI, NDTI INDICES

In [41]:
#NDVI
admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2")
delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'))
geometry = delhi.geometry()

delhi_districts = ee.FeatureCollection("projects/ee-saikrishna/assets/Delhi_districts");

dataset = ee.ImageCollection('COPERNICUS/S2')


#print(filtered.size())
def maskS2clouds(image):
    qa = image.select('QA60')
    cloudBitMask = 1 << 10;
    cirrusBitMask = 1 << 11;
    mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0));
    
    return image.updateMask(mask).divide(10000).select("B.*").copyProperties(image, ["system:time_start"]);

def addIndices(image):
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('ndvi');
    ndwi = image.normalizedDifference(['B3', 'B8']).rename('ndwi');
    mndwi = image.normalizedDifference(['B3', 'B11']).rename(['mndwi']); 
    ndti = image.normalizedDifference(['B4', 'B3']).rename(['ndti']); 
    masked_ndti = ndti.mask(mndwi); #For NDTI - we use MNDWI as mask

    return image.addBands(ndvi).addBands(ndwi).addBands(mndwi).addBands(masked_ndti);

for year in range(2017,2022):
    print(year)
    filtered = dataset.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)).filter(ee.Filter.date(str(year)+'-01-01', str(year+1)+'-04-22')).filter(ee.Filter.bounds(geometry));
    filtered = filtered.map(maskS2clouds);
    withIndices = filtered.map(addIndices);
    ndvi = withIndices.select('ndvi').toBands()
    
    #NDVI - Delhi UT Level.
   # geemap.zonal_statistics(ndvi, delhi, str(year)+'_ndvi.csv', statistics_type='MEAN', scale=10)

In [62]:
for year in range(2018,2022):
    print(year)
    filtered = dataset.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)).filter(ee.Filter.date(str(year)+'-01-01', str(year+1)+'-04-22')).filter(ee.Filter.bounds(geometry));
    filtered = filtered.map(maskS2clouds);
    withIndices = filtered.map(addIndices);
    ndti = withIndices.select('ndti').toBands()

    
    #NDVI - Delhi UT Level.
    geemap.zonal_statistics(ndti, delhi, str(year)+'_ndti.csv', statistics_type='MEAN', scale=10)

2018
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/60b8e3df9ae306eb87512371eecb7d1c-cb31f30f8202af8bd97726adaf4275c3:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2018_ndti.csv
2019
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/e986eb68d93a128226a4e5f7a9ded938-d97aa86759787a0aa45872ab70897d23:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2019_ndti.csv
2020
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/6aace7dbaf5717a953197fd27ac4fb6b-ed2291e55e99546716bc86cd3fb35664:getFeatures
Please wait ...
Data downloaded to D:\GIS_RemoteSensing\Untitled Folder\2020_ndti.csv
2021
Computing statistics ...
Generating URL ...
Downloading 

In [None]:
for year in range(2017,2022):
    print(year)
    filtered = dataset.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)).filter(ee.Filter.date(str(year)+'-01-01', str(year+1)+'-04-22')).filter(ee.Filter.bounds(geometry));
    filtered = filtered.map(maskS2clouds);
    withIndices = filtered.map(addIndices);
    ndti = withIndices.select('ndti').toBands()
    
    #NDVI - Delhi UT Level.
    geemap.zonal_statistics(ndti, delhi_districts, str(year)+'delhi_districts_ndti.csv', statistics_type='MEAN', scale=10)

# Data Pre-Processing

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Functions to prepare datasets
def prep_dataset(csv_file,metric_name,separator,date_pos=0):
    
    df = pd.read_csv(csv_file)
    df = df.T.reset_index()[:-13]
    df['date']=df['index'].str.split(separator).str[date_pos]
    
    if df['date'][0][:1]=='A':
        df['year'] = df['date'].str[1:5]
        df['day'] = df['date'].str[5:]
        # converting to date
        df['date'] = pd.to_datetime(df['year'].astype(int) * 1000 + df['day'].astype(int), format='%Y%j')
        df = df.drop(['year','day'],axis=1)
    elif len(df['date'][4])>7:
        df['date']=df['date'].str[:4]+"-"+df['date'].str[4:6]+"-"+df['date'].str[6:]
    else:
         df['date']=df['date'].str[:4]+"-"+df['date'].str[4:6]+"-"+"01"
    
    df = df.drop(['index'],axis=1)
    
    df['date'] = pd.to_datetime(df['date'])
    df.columns = [metric_name,'date']
    
    df = df.groupby('date')[metric_name].mean().reset_index()
    df = df.set_index('date')
    
    df = df.fillna(np.NaN)
    return df


def baseline(df,baseline_start,baseline_end):
    
    metrics = df.columns[:-1].copy()
    for metric_name in metrics:  ## Mobility change is already in baseline so avoiding it.
        variable_baseline = metric_name+str('_change_from_baseline')
        baseline = df.loc[baseline_start:baseline_end][metric_name].mean()
        df[variable_baseline] = df[metric_name].apply(lambda x: 100*(x -baseline)/baseline)
        df = df.drop(metric_name,axis=1)
        df = df.fillna(np.NaN)
    return df
        

def monthly_means_baseline(df):
    
    df_means = []
    for metric_name in df.columns:
        df_monthlymean = df[metric_name].resample('M').mean()
        dates = df_monthlymean.index + pd.offsets.MonthBegin(1)  #shifting date to first date of next month (average of last month)
        df_monthlymean = pd.DataFrame([dates,df_monthlymean]).T
        df_monthlymean.columns = ['date','avg_'+metric_name]
        df_monthlymean = df_monthlymean.fillna(np.NaN)
        df_means.append(df_monthlymean.set_index('date'))
    
    return pd.concat(df_means,axis=1)
    

### NDTI - Yamuna

In [3]:
# NDTI.csv Downloaded from Earth Engine directly
#ndti = pd.read_csv('NDTI.csv')
#ndti['date'] = pd.to_datetime(ndti['system:time_start'])
#ndti = ndti.drop(['system:time_start'],axis=1).set_index('date')

In [4]:
ndti_2017_df = prep_dataset('Data/2017_ndti.csv','ndti','T')
ndti_2018_df = prep_dataset('Data/2018_ndti.csv','ndti','T')
ndti_2019_df = prep_dataset('Data/2019_ndti.csv','ndti','T')
ndti_2020_df = prep_dataset('Data/2020_ndti.csv','ndti','T')
ndti_2021_df = prep_dataset('Data/2021_ndti.csv','ndti','T')
ndti_df = pd.concat([ndti_2017_df,ndti_2018_df,ndti_2019_df,ndti_2020_df,ndti_2021_df])
ndti_df = ndti_df.reset_index().drop_duplicates('date').set_index('date')

### NDVI

In [5]:
ndvi_2017_df = prep_dataset('Data/2017_ndvi.csv','ndvi','T')
ndvi_2018_df = prep_dataset('Data/2018_ndvi.csv','ndvi','T')
ndvi_2019_df = prep_dataset('Data/2019_ndvi.csv','ndvi','T')
ndvi_2020_df = prep_dataset('Data/2020_ndvi.csv','ndvi','T')
ndvi_2021_df = prep_dataset('Data/2021_ndvi.csv','ndvi','T')
ndvi_df = pd.concat([ndvi_2017_df,ndvi_2018_df,ndvi_2019_df,ndvi_2020_df,ndvi_2021_df])
ndvi_df = ndvi_df.reset_index().drop_duplicates('date').set_index('date')

### TEMP

In [6]:
temp_df = prep_dataset('Data/max_temp.csv','tmmx','_')

### PRECIPITATION

In [7]:
rain_df = pd.read_csv('Data/Delhi_rainfall_timeseries.csv')
rain_df['date'] = pd.to_datetime(rain_df['date'])
rain_df = rain_df.set_index('date')

### AEROSOLS

In [8]:
aod_2017_df = prep_dataset('Data/2017_aod.csv','aod','_',1)
aod_2018_df = prep_dataset('Data/2018_aod.csv','aod','_',1)
aod_2019_df = prep_dataset('Data/2019_aod.csv','aod','_',1)
aod_2020_df = prep_dataset('Data/2020_aod.csv','aod','_',1)
aod_2021_df = prep_dataset('Data/2021_aod.csv','aod','_',1)
aod_df = pd.concat([aod_2017_df,aod_2018_df,aod_2019_df,aod_2020_df,aod_2021_df])
aod_df = aod_df.reset_index().drop_duplicates('date').set_index('date')

### NO2 POLLUTION

In [9]:
no2_df = prep_dataset('Data/NO2_column_number_density.csv','NO2_column_number_density','T')

### NIGHT LIGHTS

In [10]:
global nightlights_df
nightlights_df = prep_dataset('Data/night_lights_timeseries.csv','avg_rad','_')

### MOBILITY

In [11]:
global mobility_df
mobility_df = pd.read_csv('Data/Delhi_citylevel_Mobility.csv')
mobility_df['date'] = pd.to_datetime(mobility_df['date'])
mobility_df = mobility_df.set_index(['date'])

## MASTER TIME SERIES DATA

In [12]:
delhi_timeseries = pd.DataFrame(pd.date_range(start='2017-01-01', end='2022-04-22', freq='D'))
delhi_timeseries.columns = ['date']

In [13]:
delhi_timeseries = delhi_timeseries.merge(ndti_df,on='date',how='outer').merge(ndvi_df,on='date',how='outer')\
.merge(temp_df,on='date',how='outer').merge(rain_df,on='date',how='outer')\
.merge(aod_df,on='date',how='outer').merge(no2_df,on='date',how='outer')\
.merge(nightlights_df,on='date',how='outer')\
.merge(mobility_df[['retail_and_recreation_percent_change_from_baseline']],on='date',how='outer').set_index('date')
delhi_timeseries.to_csv('Data/master_timeseries.csv')
delhi_timeseries.sample(5)

Unnamed: 0_level_0,ndti,ndvi,tmmx,Precipitation (mm/day),aod,NO2_column_number_density,avg_rad,retail_and_recreation_percent_change_from_baseline
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-11-29,,,,0.0,1357.82132,0.000184,,
2022-03-06,,,,,528.347507,0.000164,,-18.0
2018-03-28,,,,0.0,402.614949,,,
2017-11-03,,,,0.0,2227.745435,,,
2020-05-20,,,,0.0,473.576595,0.000134,,-74.0


In [14]:
delhi_timeseries_baselinechange = baseline(delhi_timeseries,'2020-01-03','2020-02-06')
delhi_timeseries_baselinechange.to_csv('Data/delhi_baseline.csv')
delhi_timeseries_baselinechange.tail(5)

Unnamed: 0_level_0,retail_and_recreation_percent_change_from_baseline,ndti_change_from_baseline,ndvi_change_from_baseline,tmmx_change_from_baseline,Precipitation (mm/day)_change_from_baseline,aod_change_from_baseline,NO2_column_number_density_change_from_baseline,avg_rad_change_from_baseline
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-04-18,-21.0,,,,,,-0.872347,
2022-04-19,-21.0,-61.716026,-52.714924,,,,-21.645898,
2022-04-20,,,,,,,-30.767525,
2022-04-21,,,,,,,,
2022-04-22,,,,,,,,


Google Mobility data is available every day and hence they could define a much more nuanced baseline value. Many of our variables are not available at a daily frequency. Hence, we consider the mean of values from 2020 Jan3rd to Feb6th as baseline values for all other variables.

In [15]:
delhi_monthly_means = monthly_means_baseline(delhi_timeseries.drop(['tmmx','avg_rad'],axis=1))
delhi_monthly_means = pd.merge(delhi_monthly_means, delhi_timeseries[['tmmx','avg_rad']].dropna(how='all').reset_index(),on='date',how='outer').set_index('date')
delhi_monthly_means = delhi_monthly_means.sort_values(by='date')
delhi_monthly_means.to_csv('Data/delhi_monthly_means.csv')
delhi_monthly_means.head()

Unnamed: 0_level_0,avg_ndti,avg_ndvi,avg_Precipitation (mm/day),avg_aod,avg_NO2_column_number_density,avg_retail_and_recreation_percent_change_from_baseline,avg_ndti_change_from_baseline,tmmx,avg_rad
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-01-01,,,,,,,,212.051731,25.657537
2017-02-01,-0.077274,0.19319,1.703994,967.711384,,,-2.71631,252.778797,32.753082
2017-03-01,-0.094056,0.295021,0.068811,510.898451,,,18.41012,306.769605,30.061633
2017-04-01,-0.092564,0.303732,0.425046,524.490124,,,16.531651,381.251097,27.236079
2017-05-01,-0.061425,0.160273,0.480088,590.826372,,,-22.670347,403.388256,23.60256


# Appendix - Earth Engine JS Codes.

In [None]:
#AOD
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

var collection = ee.ImageCollection('MODIS/006/MCD19A2_GRANULES').filterDate('2019-12-01', '2019-12-02').filterBounds(geometry);
                  

// helper function to extract the QA bits
function getQABits(image, start, end, newName) {
    // Compute the bits we need to extract.
    var pattern = 0;
    for (var i = start; i <= end; i++) {
       pattern += Math.pow(2, i);
    }
    // Return a single band image of the extracted QA bits, giving the band
    // a new name.
    return image.select([0], [newName])
                  .bitwiseAnd(pattern)
                  .rightShift(start);
}


// A function to mask out cloudy pixels.
function maskQuality(image) {
  // Select the QA band.
  var QA = image.select('AOD_QA');
  // Get the internal_cloud_algorithm_flag bit.
  var internalQuality = getQABits(QA,8, 11, 'internal_quality_flag');
  // Return an image masking out cloudy areas.
  return image.updateMask(internalQuality.eq(0));
}

// create cloud free composite
var AODmaskQ = collection.map(maskQuality).select('Optical_Depth_047');

print(AODmaskQ.size())


var band_viz = {
  min: 0,
  max: 500,
  palette: ['black', 'blue', 'purple', 'cyan', 'green', 'yellow', 'red']
};

Map.addLayer(AODmaskQ.mean().clip(geometry), band_viz, 'Optical Depth 047');
Map.centerObject(geometry);

In [None]:
# ERA - 5
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

// Daily mean 2m air temperature
var era5_2mt = ee.ImageCollection('ECMWF/ERA5/DAILY')
                   .select('mean_2m_air_temperature')
                   .filter(ee.Filter.date('2017-01-01', '2019-07-31'));
print(era5_2mt);

// Daily total precipitation sums
var era5_tp = ee.ImageCollection('ECMWF/ERA5/DAILY')
                  .select('total_precipitation')
                  .filter(ee.Filter.date('2017-01-01', '2022-04-22'));

// Daily mean 2m dewpoint temperature
var era5_2d = ee.ImageCollection('ECMWF/ERA5/DAILY')
                  .select('dewpoint_2m_temperature')
                  .filter(ee.Filter.date('2019-07-01', '2019-07-31'));

// Daily mean sea-level pressure
var era5_mslp = ee.ImageCollection('ECMWF/ERA5/DAILY')
                    .select('mean_sea_level_pressure')
                    .filter(ee.Filter.date('2019-07-01', '2019-07-31'));

// Daily mean surface pressure
var era5_sp = ee.ImageCollection('ECMWF/ERA5/DAILY')
                  .select('surface_pressure')
                  .filter(ee.Filter.date('2019-07-01', '2019-07-31'));

// Daily mean 10m u-component of wind
var era5_u_wind_10m = ee.ImageCollection('ECMWF/ERA5/DAILY')
                          .select('u_component_of_wind_10m')
                          .filter(ee.Filter.date('2019-07-01', '2019-07-31'));

// Convert pressure levels from Pa to hPa - Example for surface pressure
var era5_sp = era5_sp.map(function(image) {
  return image.divide(100).set(
      'system:time_start', image.get('system:time_start'));
});

// Visualization palette for total precipitation
var visTp = {
  min: 0,
  max: 0.1,
  palette: ['#FFFFFF', '#00FFFF', '#0080FF', '#DA00FF', '#FFA400', '#FF0000']
};

// Visualization palette for temperature (mean, min and max) and 2m dewpoint
// temperature
var vis2mt = {
  min: 250,
  max: 320,
  palette: [
    '#000080', '#0000D9', '#4000FF', '#8000FF', '#0080FF', '#00FFFF', '#00FF80',
    '#80FF00', '#DAFF00', '#FFFF00', '#FFF500', '#FFDA00', '#FFB000', '#FFA400',
    '#FF4F00', '#FF2500', '#FF0A00', '#FF00FF'
  ]
};

// Visualization palette for u- and v-component of 10m wind
var visWind = {
  min: 0,
  max: 30,
  palette: [
    '#FFFFFF', '#FFFF71', '#DEFF00', '#9EFF00', '#77B038', '#007E55', '#005F51',
    '#004B51', '#013A7B', '#023AAD'
  ]
};

// Visualization palette for pressure (surface pressure, mean sea level
// pressure) - adjust min and max values for mslp to min:990 and max:1050
var visPressure = {
  min: 500,
  max: 1150,
  palette: [
    '#01FFFF', '#058BFF', '#0600FF', '#DF00FF', '#FF00FF', '#FF8C00', '#FF8C00'
  ]
};


// Add layer to map
Map.addLayer(
    era5_tp.filter(ee.Filter.date('2019-07-15')), visTp,
    'Daily total precipitation sums');
Map.addLayer(
    era5_2d.filter(ee.Filter.date('2019-07-15')), vis2mt,
    'Daily mean 2m dewpoint temperature');
Map.addLayer(
    era5_2mt.filter(ee.Filter.date('2019-07-15')), vis2mt,
    'Daily mean 2m air temperature');
Map.addLayer(
    era5_u_wind_10m.filter(ee.Filter.date('2019-07-15')), visWind,
    'Daily mean 10m u-component of wind');
Map.addLayer(
    era5_sp.filter(ee.Filter.date('2019-07-15')), visPressure,
    'Daily mean surface pressure');

Map.setCenter(21.2, 22.2, 2);

var chart = ui.Chart.image.series({
  imageCollection: era5_tp,
  region: geometry,
  reducer: ee.Reducer.mean(),
  //scale: 20
}).setOptions({
      lineWidth: 1,
      title: 'ERA-5 Total Precipitation',
      interpolateNulls: true,
      vAxis: {title: 'Total Precipitation (mm/day)'},
      hAxis: {title: '', format: 'YYYY-MMM'}
    })
print(chart);

In [None]:
# TEMP - TERRA CLIMATE
var terraclimate = ee.ImageCollection("IDAHO_EPSCOR/TERRACLIMATE");
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();


// Select max temparature
// 2016 was one of the hottest year on record in Karnataka
var filtered = terraclimate.select(['tmmx'])
  .filter(ee.Filter.date('2017-04-01', '2016-04-30'))

// Terraclimate data has a resolution of 2.5 arc minutes
// How do we get the equivalent value in meters for the Export function?
// We get an image from the original collection and check it's scale
// Important: Make sure to do this before you create a composite/mosaic
var projection = ee.Image(filtered.first()).projection()
print(projection)
var scale = projection.nominalScale()
print(scale)


// Temparature values have a scale of 0.1
var scaled = filtered.map(function(image){
  return image.multiply(0.1)
    .copyProperties(image,['system:time_start'])
});

// Our collection has just 1 image at this point. 
// But if we had images for multiple months, we can call max() to get
// maximum value at pixel from multiple images.
// Calling a reducer on a collection gives us an image - which we can export
var maxTemperature = scaled.reduce(ee.Reducer.max()).clip(geometry)

In [None]:
## NDVI Time Series
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

var s2 = ee.ImageCollection('COPERNICUS/S2');

var filtered = s2.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)) //less than filter
  .filter(ee.Filter.date('2017-01-01', '2022-04-22'))
  .filter(ee.Filter.bounds(geometry)); 
  
print(filtered.size());

// Write a function for Cloud masking
function maskS2clouds(image) {
  var qa = image.select('QA60')
  var cloudBitMask = 1 << 10;
  var cirrusBitMask = 1 << 11;
  var mask = qa.bitwiseAnd(cloudBitMask).eq(0).and(
             qa.bitwiseAnd(cirrusBitMask).eq(0));
  return image.updateMask(mask)//.divide(10000)
      .select("B.*")
      .copyProperties(image, ["system:time_start"]);
}
var filtered = filtered.map(maskS2clouds);

// Write a function that computes NDVI, NDWI for an image and adds it as a band
function addIndices(image) {
  var ndvi = image.normalizedDifference(['B8', 'B4']).rename('ndvi');
  var ndwi = image.normalizedDifference(['B3', 'B8']).rename('ndwi');
  var mndwi = image.normalizedDifference(['B3', 'B11']).rename(['mndwi']);
  var ndti = image.normalizedDifference(['B4', 'B3']).rename(['ndti'])
  
  var masked_ndti = ndti.mask(mndwi);
  return image.addBands(ndvi).addBands(ndwi).addBands(mndwi).addBands(masked_ndti);
}

// Map the function over the collection
var withIndices = filtered.map(addIndices);


// Display a time-series chart
var chart = ui.Chart.image.series({
  imageCollection: withIndices.select('ndti'),
  region: geometry,
  reducer: ee.Reducer.mean(),
  scale: 20,
}).setOptions({
      lineWidth: 1,
      title: 'NDTI Time Series - Yamuna',
      interpolateNulls: true,
      vAxis: {title: 'NDTI'},
      hAxis: {title: '', format: 'YYYY-MMM'}
    })
print(chart);

In [None]:
# NO2 TROPOMI
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

var collection = ee.ImageCollection('COPERNICUS/S5P/NRTI/L3_NO2')
  .select('NO2_column_number_density')
  .filterDate('2021-05-01', '2021-06-01');

var band_viz = {
  min: 0,
  max: 0.0002,
  palette: ['green', 'blue', 'purple', 'cyan', 'yellow', 'orange', 'red']
};

Map.addLayer(collection.mean().clip(geometry), band_viz, 'S5P N02');
Map.centerObject(geometry);

//var chart = ui.Chart.image.series({
//  imageCollection: collection,
//  region: geometry,
//  reducer: ee.Reducer.mean(),
//  //scale: 20
//}).setOptions({
//      lineWidth: 1,
//      title: 'NO2 Pollution',
//      interpolateNulls: true,
//      vAxis: {title: 'NO2 (mol/m2)'},
//      hAxis: {title: '', format: 'YYYY-MMM'}
//    })
//print(chart);

In [None]:
## Night lights - VIIRS
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

var dataset = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG')
                  .filter(ee.Filter.date('2020-05-01', '2020-05-22')).filter(ee.Filter.bounds(geometry)); 
  
print(dataset.size());

var nighttime = dataset.select('avg_rad').mean().clip(geometry);
var nighttimeVis = {min: 0.0, max: 60.0, palette: ['black', 'white']};

Map.addLayer(nighttime, nighttimeVis, 'Nighttime');
Map.centerObject(geometry);

Export.image.toDrive({
    image: nighttime,
    description: 'Delhi_Nightlights_2020_apr',
    folder: 'earthengine',
    fileNamePrefix: 'Delhi_Nightlights_2020_apr',
    region: geometry,
    scale: 20,
    maxPixels: 1e9
})


//var chart = ui.Chart.image.series({
  //imageCollection: dataset.select('avg_rad'),
  //region: geometry,
  //reducer: ee.Reducer.mean(),
  //scale: 20
//}).setOptions({
    //  lineWidth: 1,
  //    title: 'Delhi - Average Radiance',
//      interpolateNulls: true,
      //vAxis: {title: 'Avg Radiance'},
    //  hAxis: {title: '', format: 'YYYY-MMM'}
  //  })
//print(chart);


In [None]:
# NDVI Bulk Compute
var admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2");
var delhi = admin2.filter(ee.Filter.eq('ADM1_NAME', 'Delhi'));
var geometry = delhi.geometry();

var s2 = ee.ImageCollection('COPERNICUS/S2');

var filtered = s2.filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 30)) //less than filter
  .filter(ee.Filter.date('2020-04-15', '2020-04-30'))
  .filter(ee.Filter.bounds(geometry)); 
  
print(filtered.size());
var image = filtered.median();

// Write a function that computes NDVI for an image and adds it as a band
function addIndices(image) {
  var ndvi = image.normalizedDifference(['B8', 'B4']).rename('ndvi');
  var ndwi = image.normalizedDifference(['B3', 'B8']).rename('ndwi');
  var mndwi = image.normalizedDifference(['B3', 'B11']).rename(['mndwi']); 
  var ndti = image.normalizedDifference(['B4', 'B3']).rename(['ndti']); 
  return image.addBands(mndwi).addBands(ndti);
}

// Map the function over the collection
var withIndices = filtered.map(addIndices);

var composite = withIndices.mean();
//var ndviComposite = composite.select('ndvi').clip(geometry);
//var ndwiComposite = composite.select('ndwi').clip(geometry);
var mndwiComposite = composite.select('mndwi').clip(geometry);
var ndtiComposite = composite.select('ndti').clip(geometry);

var masked_ndti = ndtiComposite.updateMask(mndwiComposite);

var ndvi_palette = [
  'FFFFFF', 'CE7E45', 'DF923D', 'F1B555', 'FCD163', '99B718',
  '74A901', '66A000', '529400', '3E8601', '207401', '056201',
  '004C00', '023B01', '012E01', '011D01', '011301'];

var ndviVis = {min:0, max:0.5, palette: ndvi_palette};
var ndwiVis = {min:0, max:1, palette: ['white','blue']};
var mndwiVis = {min:0, max:1, palette: ['white','blue']};
var rgbVis = {min: 0.0, max: 3000, bands: ['B4', 'B3', 'B2']};
var ndtiVis = {min:-0.2, max:0.1, palette: ['white','black']};


Map.addLayer(image.clip(geometry), rgbVis, 'Image');
//Map.addLayer(ndviComposite, ndviVis, 'ndvi');
//Map.addLayer(ndwiComposite, ndwiVis, 'ndwi');
Map.addLayer(mndwiComposite, mndwiVis, 'mndwi');
Map.addLayer(masked_ndti, ndtiVis, 'ndti');
Map.centerObject(geometry);

var stats = masked_ndti.reduceRegion({
  reducer: ee.Reducer.mean(),
  geometry: geometry,
  scale: 10,
  maxPixels: 1e10
  });
print(stats);

//Export.image.toDrive({
  //  image: ndviComposite,
  //  description: 'Delhi_NDVI_2019',
  //  folder: 'earthengine',
  //  fileNamePrefix: 'Delhi_NDVI_2019',
  //  region: geometry,
  //  scale: 20,
  //  maxPixels: 1e9
//});