In [177]:
%load_ext autoreload
%autoreload 2



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [259]:
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
import statsmodels.api as sm


In [242]:
import eda_analysis as ea
import plotting_libs as plo
import ts_tests as tse

import pandas as pd
import altair as alt

In [9]:
%matplotlib inline

In [13]:
wf_data = "/Users/ccsuehara/Google Drive/Bolivia Wildfires/Bolivia Fire Historical Raw Data"
eco_act = "/Users/ccsuehara/GH folders/Bolivia_wildfires/data"

In [224]:
## Have data ready
bolivia_dptos = ea.bol_gdf() ##geodata from Bolivia 
econ_act = ea.prepare_econ_data(eco_act) ## economic activities from Bolivia


In [225]:
wildfires = ea.prepare_wildfires_data(wf_data)
wildfires = ea.keep_dep_confid(wildfires, 50)
wildfires = ea.make_cutpoints_frp(wildfires, 40)
small_fire, big_fire = ea.group_wf(wildfires)

In [226]:
## Obtain monthly data
monthly_big = ea.group_data(big_fire,
                            ['year_month', 'year', 'month'],
                           {'num_fires': 'sum',
                            'frp': ['mean', 'median', 'max']})

In [227]:
monthly_small = ea.group_data(small_fire,
                            ['year_month', 'year', 'month'],
                           {'num_fires': 'sum',
                            'frp': ['mean', 'median', 'max']})

monthly_small.rename(columns = {'num_firessum': 's_num_firessum',
                               'frpmean': 's_frpmean',
                               'frpmedian': 's_frpmedian',
                               'frpmax': 's_frpmax'}, inplace = True)

In [228]:
monthly_wf = pd.merge(monthly_big, monthly_small, on = ["year_month", "year", "month"])

In [229]:
eda_df = pd.merge(econ_act, monthly_wf, on = ["year_month", "year", "month"])
eda_df.drop(columns = ['year', 'month', 'month_num', 'year_month'], inplace = True)
eda_df = eda_df.set_index('date')

In [230]:
g1 = plo.small_mults(eda_df.reset_index(),
               ['ind', 'ag', 'energy', 'mining'])
g2 = plo.small_mults(eda_df.reset_index(),
               ['manufacturing', 'utilities','construction', 'commerce'])
g3 = plo.small_mults(eda_df.reset_index(),
               ['transportation', 'communications','finance', 'professional_services']) 
g4 = plo.small_mults(eda_df.reset_index(),
               ['public_services', 'other_services']) 

In [231]:
g1 | g2 | g3 | g4

In [232]:
g1 = plo.small_mults(eda_df.reset_index(),
               ['num_firessum', 'frpmean'])
g2 = plo.small_mults(eda_df.reset_index(),
               ['frpmedian', 'frpmax'])
g3 = plo.small_mults(eda_df.reset_index(),
               ['s_num_firessum', 's_frpmean'])
g4 = plo.small_mults(eda_df.reset_index(),
               ['s_frpmedian', 's_frpmax'])

In [233]:
g1 | g2 | g3 | g4

In [234]:
dfcorr = ea.transf_4_heat(eda_df)
plo.pearson_heatmap(dfcorr)

In [235]:
tse.print_results_adf(eda_df, tse.aug_df)

    Augmented Dickey-Fuller Test
 -------------------------------------------------------
ind | -0.3371 | 13 | 13 | 0.9201 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


ag | 3.4301 | 12 | 12 | 1.0 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


energy | -1.5838 | 12 | 12 | 0.4917 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


mining | -1.3814 | 11 | 11 | 0.5912 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


manufacturing | 1.6075 | 12 | 12 | 0.9979 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


utilities | -1.2853 | 12 | 12 | 0.6359 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


construction | -1.3839 | 13 | 13 | 0.59 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


In [248]:
#Making logs so to see if if stationarizes
log_eda_df = ea.log_df(eda_df)

In [251]:
g1 = plo.small_mults(log_eda_df.reset_index(),
               ['ind', 'ag', 'energy', 'mining'])
g2 = plo.small_mults(log_eda_df.reset_index(),
               ['manufacturing', 'utilities','construction', 'commerce'])
g3 = plo.small_mults(log_eda_df.reset_index(),
               ['transportation', 'communications','finance', 'professional_services']) 
g4 = plo.small_mults(log_eda_df.reset_index(),
               ['public_services', 'other_services']) 
g1 | g2 | g3 | g4

In [250]:
tse.print_results_adf(log_eda_df, tse.aug_df)

    Augmented Dickey-Fuller Test
 -------------------------------------------------------
ind | -1.477 | 13 | 13 | 0.5449 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


ag | 1.8029 | 14 | 14 | 0.9984 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


energy | -1.9335 | 14 | 14 | 0.3165 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


mining | -1.4692 | 11 | 11 | 0.5487 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


manufacturing | -0.1494 | 12 | 12 | 0.9443 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


utilities | -3.5924 | 12 | 12 | 0.0059 | Reject H0: Stationary 
 -------------------------------------------------------


construction | -3.0999 | 13 | 13 | 0.0266 | Reject H0: Stationary 
 -------------------------------------------------------


commerce | -0

In [252]:
#Making logs so to see if if stationarizes
log_diff_eda_df = ea.difference_df(log_eda_df)

In [255]:
g1 = plo.small_mults(log_diff_eda_df.reset_index(),
               ['ind', 'ag', 'energy', 'mining'])
g2 = plo.small_mults(log_diff_eda_df.reset_index(),
               ['manufacturing', 'utilities','construction', 'commerce'])
g3 = plo.small_mults(log_diff_eda_df.reset_index(),
               ['transportation', 'communications','finance', 'professional_services']) 
g4 = plo.small_mults(log_diff_eda_df.reset_index(),
               ['public_services', 'other_services']) 
g1 | g2 | g3 | g4

In [253]:
tse.print_results_adf(log_diff_eda_df, tse.aug_df)

    Augmented Dickey-Fuller Test
 -------------------------------------------------------
ind | -1.4134 | 6 | 6 | 0.5758 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


ag | -2.4675 | 3 | 3 | 0.1236 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


energy | -2.6278 | 12 | 12 | 0.0874 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


mining | -4.0109 | 11 | 11 | 0.0014 | Reject H0: Stationary 
 -------------------------------------------------------


manufacturing | -5.551 | 1 | 1 | 0.0 | Reject H0: Stationary 
 -------------------------------------------------------


utilities | -0.6015 | 8 | 8 | 0.8706 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


construction | -0.6782 | 13 | 13 | 0.8523 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


commerce | -6.1133 | 0 

In [256]:
log_diff2_eda_df = ea.difference_df(log_diff_eda_df)

In [257]:
g1 = plo.small_mults(log_diff2_eda_df.reset_index(),
               ['ind', 'ag', 'energy', 'mining'])
g2 = plo.small_mults(log_diff2_eda_df.reset_index(),
               ['manufacturing', 'utilities','construction', 'commerce'])
g3 = plo.small_mults(log_diff2_eda_df.reset_index(),
               ['transportation', 'communications','finance', 'professional_services']) 
g4 = plo.small_mults(log_diff2_eda_df.reset_index(),
               ['public_services', 'other_services']) 
g1 | g2 | g3 | g4

In [258]:
tse.print_results_adf(log_diff2_eda_df, tse.aug_df)

    Augmented Dickey-Fuller Test
 -------------------------------------------------------
ind | -2.7351 | 13 | 13 | 0.0682 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


ag | -3.4749 | 12 | 12 | 0.0087 | Reject H0: Stationary 
 -------------------------------------------------------


energy | -4.4842 | 13 | 13 | 0.0002 | Reject H0: Stationary 
 -------------------------------------------------------


mining | -6.1272 | 11 | 11 | 0.0 | Reject H0: Stationary 
 -------------------------------------------------------


manufacturing | -3.822 | 12 | 12 | 0.0027 | Reject H0: Stationary 
 -------------------------------------------------------


utilities | -2.7883 | 12 | 12 | 0.06 | Can't reject H0: Non-Stationary 
 -------------------------------------------------------


construction | -3.0318 | 13 | 13 | 0.032 | Reject H0: Stationary 
 -------------------------------------------------------


commerce | -5.8774 | 0 | 0 | 0.0 | Reject H0: St

In [300]:
Y_array = ['ind', 'ag', 'energy', 'mining', 'manufacturing', 'utilities',
       'construction', 'commerce', 'transportation', 'communications',
       'finance', 'professional_services', 'public_services', 'other_services',]

X_array = ['num_firessum', 'frpmean', 'frpmedian', 'frpmax', 's_num_firessum',
       's_frpmean', 's_frpmedian', 's_frpmax']


df_of_betas = ea.make_betas_matrix(log_diff2_eda_df,X_array, Y_array)

In [301]:
df_of_betas

Unnamed: 0,index,coeff,pvals,conf_lower,conf_higher,depend
1,num_firessum,-9e-05,0.933397,-0.002212,0.002032,ind
1,frpmean,0.007268,0.081976,-0.000936,0.015472,ind
1,frpmedian,0.022594,0.001519,0.008814,0.036375,ind
1,frpmax,0.000973,0.426417,-0.001442,0.003388,ind
1,s_num_firessum,-0.000777,0.573219,-0.003503,0.001948,ind
1,s_frpmean,0.004078,0.781735,-0.025,0.033155,ind
1,s_frpmedian,0.012334,0.200771,-0.006651,0.031318,ind
1,s_frpmax,-0.091971,0.125788,-0.210095,0.026152,ind
1,num_firessum,-0.005926,0.001077,-0.009426,-0.002426,ag
1,frpmean,-0.001807,0.803426,-0.016151,0.012537,ag
