In [None]:
import pandas as pd
import numpy as np
import math
import pandas as pd
from pyhdf.SD import SD, SDC
import os
import math
import datetime as dt
import h5py

# Here we create 8-day data for Night Time Light emissions from anthropogenic sources

The VNP46A2 product used here is quite useful to study the night time light emissions from human activities. This will help us in understanding the patterns in human activity of all kinds including covid lockdowns and restricions. The VNP46 estimates the light emissions from other sources like lunar (actual moonlight), aerosol and surface reflections. All these estimates are used to provide the atmosphere and lunar corrected data to accurately understand the light emissions from human generated activities. Temporal gap filling is also done to cope with any missing data or gaps.

The data is provided in the form of HDF-EOS hdf5 format with the extension of files being h5. We use h5py package to parse and read the data. This is a daily product so each file consists of data pertaining to a day. Also, each file only consists of data of 10degree x 10degree tile of linear latitude/longitude grid. There are 460 such land title on the globe for which daily data is provided under this product. We selected a polygon to filter the data for India which resulted into the selection of 14 10degree x 10 degree tiles. So in total 14 x 1000 (number days of interest under this study), we have roughly 14000 files. 

An example file is "VNP46A2.A2020008.h27v07.001.2021053073636.h5" which specifies that this file file is for 8th day of year 2020. Also, it has data of the tile which is 28th horizontally and 8th vertically. Essentially it has data of the following 10degree x 10degree cell:
- 90 degree to 100 degree longitude
- 20 degree to 10 degree latitude 

We use "Gap_Filled_DNB_BRDF-Corrected_NTL" scientific dataset which is Gap Filled BRDF corrected DNB NTL as explained above. The size of the dataset in each file is 2400 x 2400. Each cell essentially has value for 500 meters x 500 meters area. Since data for 10 degrees is given in 2400 gridded cells linearly, we Generate data accordingly at three resolutions:
- 0.05 degree x 0.05 degree: Roughly an area of 5.6kms x 5.6kms. Here we combine 12 cells on both latitude and longitude dimensions to give us a 0.05 degree x 0.05 degree cell.
- 0.25 degree x 0.25 degree: Roughly an area of 33kms x 33kms. Here we combine 60 cells on both latitude and longitude dimensions to give us a 0.25 degree x 0.25 degree cell.
- 1 degree x 1 degree: Roughly an area of 110kms x 110kms. Here we combine 240 cells on both latitude and longitude dimensions to give us a 1 degree x 1 degree cell.
Each cell under any scheme is denoted by the centers of their range of coordinates.
Based on the coordinates available for each city, we map the city to one of the respective cell under each scheme.

Under any of the three resolution schemes, whicle aggregating the data, we record different statistical measures like mean, standard deviation, minimum, maximum, median and values at different percentiles like 10,25,75 and 90

For all the three resolution schemes, we generate data at the following two temporal levels:
- 8 days: Each month is assigned four weeks - (1st to 8th, 9th to 16th, 17th to 24th, 25th to end of the month), so that they are standard for all years and can be compared across the years
- daily

In [None]:
GRANULES_PATH="ntl/VNP46A2/"
OUTPUT_PATH="data/"
LOCATION_FILE = 'geonames-all-cities-with-a-population-1000-3.csv'
WEEK_MAPPINGS = 'data/week_mappings_ntl.csv'
week_data = pd.read_csv(WEEK_MAPPINGS)
week_data=week_data.set_index("week")["granules"].to_dict()
for week in week_data:
    week_data[week]=week_data[week].split("#")

In [None]:
metrics={"Gap_Filled_DNB_BRDF-Corrected_NTL":"/HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/Gap_Filled_DNB_BRDF-Corrected_NTL"}

In [None]:
degree=1

In [None]:
fp=open(LOCATION_FILE,"r")
evrythng=fp.readlines()
fp.close()
locations=set()
for line in evrythng[1:]:
    line=line.split("\n")[0].split(";")
    [lat,long]=line[-1].split(",")
    if degree==1:
        lat=min(math.ceil(float(lat)),90.0)-0.5
        long=min(math.ceil(float(long)),180.0)-0.5
    elif degree==0.05:
        lat=round(90-(int((90-float(lat))/0.05)*0.05+0.025),3)
        long=round(180-(int((180-float(long))/0.05)*0.05+0.025),3)
    elif degree==0.25:
        lat=round(90-(int((90-float(lat))/0.25)*0.25+0.125),3)
        long=round(180-(int((180-float(long))/0.25)*0.25+0.125),3)
    else:
        print("wrong degree resolution provided!")
    locations.add((lat,long))

In [None]:
def get_indices(location,granule):
    lat_tile_number=int(granule.split(".")[2].split("v")[1])
    long_tile_number=int(granule.split(".")[2].split("v")[0].split("h")[1])
    tile_lat_start_index=lat_tile_number*2400
    tile_lat_end_index=tile_lat_start_index+2400-1
    tile_long_start_index=long_tile_number*2400
    tile_long_end_index=tile_long_start_index+2400-1
    (lat,long)=location
    if degree==1:
        lat_start_index=int((90-lat-0.5)*240)
        lat_end_index=lat_start_index+240-1
        long_start_index=int((long-0.5+180)*240)
        long_end_index=long_start_index+240-1
    elif degree==0.05:
        lat_start_index=round(((90-lat-0.025)/0.05)*12)
        lat_end_index=lat_start_index+12-1
        long_start_index=round(((long-0.025+180)/0.05)*12)
        long_end_index=long_start_index+12-1
    elif degree==0.25:
        lat_start_index=round(((90-lat-0.125)/0.25)*60)
        lat_end_index=lat_start_index+60-1
        long_start_index=round(((long-0.125+180)/0.25)*60)
        long_end_index=long_start_index+60-1
    else:
        print("wrong degree resolution provided!")
    
    if lat_start_index>=tile_lat_start_index and lat_start_index<tile_lat_end_index:
        lat_range=(lat_start_index-tile_lat_start_index,lat_end_index-tile_lat_start_index)
    else:
        return None
    if long_start_index>=tile_long_start_index and long_start_index<tile_long_end_index:
        long_range=(long_start_index-tile_long_start_index,long_end_index-tile_long_start_index)
    else:
        return None
    return [lat_range,long_range]

In [None]:
OUTPUT_FILENAME=OUTPUT_PATH+"indian_cities_"+str(degree)+"degx"+str(degree)+"deg_ntl.csv"
fp=open(OUTPUT_FILENAME,"w")
fp.write(",".join(["location","week","metric","mean","sdev","min","10p","25p","median","75p","90p","max"])+"\n")
fp.close()
for week in week_data:
    print(week)
    #t1=dt.datetime.now()
    week_metric_data={}
    for metric in metrics:
        week_metric_data[metric]={}
    for granule in week_data[week]:
        f=h5py.File(GRANULES_PATH+granule, "r")
        for metric in metrics:
            metric_data=f[metrics[metric]]
            for location in locations:
                indices=get_indices(location,granule)
                if indices is None:
                    continue
                [lat_range,long_range]=indices
                temp=metric_data[lat_range[0]:lat_range[1]+1,long_range[0]:long_range[1]+1]
                temp=temp[np.where((temp>=0) & (temp<65535))]
                if len(temp)<=0:
                    continue
                if location not in week_metric_data[metric]:
                    week_metric_data[metric][location]=[]
                week_metric_data[metric][location]=np.append(week_metric_data[metric][location],temp)
    fp=open(OUTPUT_FILENAME,"a")
    for metric in week_metric_data:
        for location in week_metric_data[metric]:
            avg=round(np.mean(week_metric_data[metric][location]))
            sdev=round(np.std(week_metric_data[metric][location]))
            levels=[round(np.percentile(week_metric_data[metric][location],th)) for th in [0,10,25,50,75,90,100]]
            fp.write(",".join([str(location[0])+"#"+str(location[1]),week,metric]+[str(i) for i in [avg,sdev]+levels])+"\n")
    fp.close()
    #t2=dt.datetime.now()
    #print((t2-t1).seconds)

1.2019.week1
1.2019.week2
1.2019.week3
1.2019.week4
2.2019.week1
2.2019.week2
2.2019.week3
2.2019.week4
3.2019.week1
3.2019.week2
3.2019.week3
3.2019.week4
4.2019.week1
4.2019.week2
4.2019.week3
4.2019.week4
5.2019.week1
5.2019.week2
5.2019.week3
5.2019.week4
6.2019.week1
6.2019.week2
6.2019.week3
6.2019.week4
7.2019.week1
7.2019.week2
7.2019.week3
7.2019.week4
8.2019.week1
8.2019.week2
8.2019.week3
8.2019.week4
9.2019.week1
9.2019.week2
9.2019.week3
9.2019.week4
10.2019.week1
10.2019.week2
10.2019.week3
10.2019.week4
11.2019.week1
11.2019.week2
11.2019.week3
11.2019.week4
12.2019.week1
12.2019.week2
12.2019.week3
12.2019.week4
1.2020.week1
1.2020.week2
1.2020.week3
1.2020.week4
2.2020.week1
2.2020.week2
2.2020.week3
2.2020.week4
3.2020.week1
3.2020.week2
3.2020.week3
3.2020.week4
4.2020.week1
4.2020.week2
4.2020.week3
4.2020.week4
5.2020.week1
5.2020.week2
5.2020.week3
5.2020.week4
6.2020.week1
6.2020.week2
6.2020.week3
6.2020.week4
7.2020.week1
7.2020.week2
7.2020.week3
7.2020.week4
