## Data Compiling

In this notebook, I take the grid of climate divisions across latitude and longitude and assign a drought time series to each "pixel". Each pixel on the map ultimately contains its own time series of 1009 weeks each with 6 values (None, D0, D1, D2, D3, D4). The final dataset is a 4D matrix with a shape of (105, 237, 1009, 6).

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
drt = pd.read_csv("./data/drought_data_combined.csv")

In [3]:
drt.set_index("Date", inplace=True)

In [4]:
drt.index = pd.to_datetime(drt.index)

In [5]:
drt.head()

Unnamed: 0_level_0,ClimateDivisionsID,Region,State,None,D0,D1,D2,D3,D4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-12-26,3701,ALL,Rhode Island,100.0,0.0,0.0,0.0,0.0,0.0
2000-12-26,1808,ALLEGHENY PLATEAU,Maryland,19.46,80.54,0.0,0.0,0.0,0.0
2000-12-26,102,APPALACHIAN MOUNTAIN,Alabama,0.0,100.0,100.0,71.2,0.0,0.0
2000-12-26,1807,APPALACHIAN MOUNTAIN,Maryland,96.37,3.63,0.0,0.0,0.0,0.0
2000-12-26,501,ARKANSAS DRAINAGE BASIN,Colorado,100.0,0.0,0.0,0.0,0.0,0.0


In [6]:
us_clim_div_grid = pd.read_csv("./data/us_climate_division_grid.csv")

In [7]:
us_clim_div_grid.set_index("Unnamed: 0", drop=True, inplace=True)

In [8]:
us_clim_div_grid.head()

Unnamed: 0_level_0,-125.0,-124.75,-124.5,-124.25,-124.0,-123.75,-123.5,-123.25,-123.0,-122.75,...,-68.25,-68.0,-67.75,-67.5,-67.25,-67.0,-66.75,-66.5,-66.25,-66.0
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
50.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49.75,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49.5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49.25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
49.0,0,0,0,0,0,0,0,0,0,4503,...,0,0,0,0,0,0,0,0,0,0


In [9]:
latitudes = [0.25*i for i in list(range(96, 201))]
latitudes.reverse()

longitudes = [0.25*i for i in list(range(-500, -263))]

dates = list(drt.index.unique().sort_values())

In [10]:
def get_time_series(climate_division):
    time_series = np.array([])
    if climate_division == 0:
        time_series = np.array([np.array([0.0]*6)]*1009)
    else:
        for date in dates:
            data_point = np.array(drt[drt["ClimateDivisionsID"] == climate_division]\
                              .loc[date, ["None", "D0", "D1", "D2", "D3", "D4"]])
            time_series = np.append(time_series, data_point)
        
    return time_series.reshape(1009, 6)

In [11]:
drought_4D_matrix = np.array([])
for lat in latitudes:
    lat_row = np.array([])
    for long in longitudes:
        climate_division = us_clim_div_grid.loc[lat, str(long)]
        lat_row = np.append(lat_row, get_time_series(climate_division))
    drought_4D_matrix = np.append(drought_4D_matrix, lat_row)
    if len(drought_4D_matrix) % (5*1009*len(longitudes)*6) == 0:
        print(f"{int(len(drought_4D_matrix) / (1009*len(longitudes)*6))} rows generated")
    elif len(drought_4D_matrix) == len(latitudes)*len(longitudes)*1009*6:
        print("COMPLETE")
        
drought_4D_matrix = drought_4D_matrix.reshape(len(latitudes), len(longitudes), 1009, 6)

5 rows generated
10 rows generated
15 rows generated
20 rows generated
25 rows generated
30 rows generated
35 rows generated
40 rows generated
45 rows generated
50 rows generated
55 rows generated
60 rows generated
65 rows generated
70 rows generated
75 rows generated
80 rows generated
85 rows generated
90 rows generated
95 rows generated
100 rows generated
105 rows generated


In [12]:
drought_4D_matrix.shape

(105, 237, 1009, 6)

In [13]:
drought_4D_matrix.ravel().shape

(150653790,)

In [14]:
pd.DataFrame(drought_4D_matrix.ravel()).to_csv("./data/drought_array.csv")