# Historical precipitation data processing
We got the data from API in 'long' format and had to convert it to 'wide', so every fire (unique id) has a sum of precipitation for the preceding 6 months.

In [1]:
# Imports
import pandas as pd
import numpy as np

In [None]:
# Import of wildfires data so we have start date of the fire
fires = pd.read_csv('../../data/')

In [2]:
# Processing data so we have a sum of snow and rain for the 6 previous months before the fire started
precip = pd.read_csv('../../data/raw/nasa_weather_last_180days.csv') # import of historical precipitation data gathered by POWER API
precip.head(10)

Unnamed: 0,PARAMETER,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANN,ID
0,PRECSNO,2020,1.32,0.66,0.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.66,0.66,0.0,1
1,PRECTOTCORR_SUM,2020,52.73,21.09,26.37,10.55,21.09,26.37,0.0,0.0,5.27,0.0,31.64,21.09,216.21,1
2,PRECSNO,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
3,PRECTOTCORR_SUM,2020,0.0,21.09,63.28,5.27,0.0,0.0,0.0,5.27,0.0,0.0,5.27,0.0,100.2,5
4,PRECSNO,2019,0.66,1.98,0.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.32,0.66,10
5,PRECSNO,2020,0.0,0.0,1.32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.66,0.0,10
6,PRECTOTCORR_SUM,2019,89.65,163.48,68.55,21.09,42.19,0.0,10.55,0.0,31.64,0.0,5.27,63.28,495.7,10
7,PRECTOTCORR_SUM,2020,10.55,0.0,47.46,15.82,10.55,5.27,0.0,0.0,0.0,0.0,36.91,26.37,152.93,10
8,PRECSNO,2020,3.3,0.0,5.27,0.66,0.0,0.0,0.0,0.0,0.0,0.0,0.66,2.64,1.32,12
9,PRECSNO,2021,4.53,1.48,1.98,0.2,0.02,0.0,0.0,0.0,0.0,0.99,0.09,13.09,1.89,12


In [3]:
precip.PARAMETER.value_counts()

PRECSNO            3116
PRECTOTCORR_SUM    3116
Name: PARAMETER, dtype: int64

In [4]:
# Dropping annual column
precip = precip.drop(columns = 'ANN')
precip.head()

Unnamed: 0,PARAMETER,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ID
0,PRECSNO,2020,1.32,0.66,0.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.66,0.66,1
1,PRECTOTCORR_SUM,2020,52.73,21.09,26.37,10.55,21.09,26.37,0.0,0.0,5.27,0.0,31.64,21.09,1
2,PRECSNO,2020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
3,PRECTOTCORR_SUM,2020,0.0,21.09,63.28,5.27,0.0,0.0,0.0,5.27,0.0,0.0,5.27,0.0,5
4,PRECSNO,2019,0.66,1.98,0.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.32,10


In [5]:
# Renaming months to their numbers
precip.columns = ['PARAMETER', 'YEAR', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', 'ID']

In [6]:
# Dividing tables to snow and rain tables
precip_rain = precip[precip.PARAMETER == 'PRECTOTCORR_SUM']
precip_snow = precip[precip.PARAMETER == 'PRECSNO']

precip_rain.shape, precip_snow.shape

((3116, 15), (3116, 15))

In [7]:
# Dropping PARAMETER columns
precip_rain = precip_rain.drop(columns = 'PARAMETER')
precip_snow = precip_snow.drop(columns = 'PARAMETER')
precip_rain.head()

Unnamed: 0,YEAR,01,02,03,04,05,06,07,08,09,10,11,12,ID
1,2020,52.73,21.09,26.37,10.55,21.09,26.37,0.0,0.0,5.27,0.0,31.64,21.09,1
3,2020,0.0,21.09,63.28,5.27,0.0,0.0,0.0,5.27,0.0,0.0,5.27,0.0,5
6,2019,89.65,163.48,68.55,21.09,42.19,0.0,10.55,0.0,31.64,0.0,5.27,63.28,10
7,2020,10.55,0.0,47.46,15.82,10.55,5.27,0.0,0.0,0.0,0.0,36.91,26.37,10
10,2020,73.83,0.0,131.84,68.55,68.55,0.0,0.0,0.0,0.0,0.0,73.83,79.1,12


In [8]:
# How many years do we have?
precip_rain.YEAR.value_counts()

2020    1792
2021     862
2019     462
Name: YEAR, dtype: int64

In [18]:
# Pivoting different years data to the same row
precip_rain_pivoted = precip_rain.pivot(index = 'ID', columns = 'YEAR')

# Renaming columns so they can be sorted
precip_rain_pivoted.columns = [(str(col[1])[-2:] + '_' + str(col[0])) for col in precip_rain_pivoted.columns]

# Sorting columns in descending order
precip_rain_pivoted = precip_rain_pivoted.sort_index(axis=1, ascending = False)

precip_rain_pivoted.head(4)

Unnamed: 0_level_0,21_12,21_11,21_10,21_09,21_08,21_07,21_06,21_05,21_04,21_03,...,19_10,19_09,19_08,19_07,19_06,19_05,19_04,19_03,19_02,19_01
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
10,,,,,,,,,,,...,0.0,31.64,0.0,10.55,0.0,42.19,21.09,68.55,163.48,89.65
12,340.79,51.12,287.44,14.16,0.87,3.29,2.35,7.56,9.6,79.1,...,,,,,,,,,,


In [13]:
# Querying non-null values and adding first seven of them together to get precipitation sum for the last half of a year including current month
sums = []
for i in range(2128):
    sums.append(precip_rain_pivoted.iloc[i][precip_rain_pivoted.iloc[i].notna()][:7].sum())
    
# Assigning result to a new column
precip_rain_pivoted['sum_rain'] = sums

In [14]:
# Repeating the sequence for snow data
precip_snow_pivoted = precip_snow.pivot(index = 'ID', columns = 'YEAR') # pivot
precip_snow_pivoted.columns = [(str(col[1])[-2:] + '_' + str(col[0])) for col in precip_snow_pivoted.columns] # renaming columns for sorting
precip_snow_pivoted = precip_snow_pivoted.sort_index(axis=1, ascending = False) # sorting columns chronologically in descending order

# calculating snow sums for every id/fire location
sums = []
for i in range(2128):
    sums.append(precip_snow_pivoted.iloc[i][precip_rain_pivoted.iloc[i].notna()][:7].sum())
    
# Assigning result to a new column in a summary dataset
precip_rain_pivoted['sum_snow'] = sums

In [16]:
# Export of results so they can be merged into X 
precip_rain_pivoted[['sum_rain','sum_snow']].to_csv('../../data/processed/precipitation_sum_180d.csv')