In [3]:
# Packages for general use
import numpy as np
import pandas as pd

# For handling data
from sklearn import preprocessing
import rasterio as rio

# Modeling
from sklearn import linear_model
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# Event-specific Matrices

In [4]:
all_events_feats = pd.read_csv("Data/data_for_model.csv")

In [32]:
all_events_feats.columns

Index([u'EVENT_TYPE', u'MAGNITUDE', u'TOR_LENGTH', u'TOR_WIDTH', u'DURATION',
       u'CLOSEST_HOSPITAL_DIST', u'NUM_COUNTY_HOSPITALS',
       u'NUM_STATE_HOSPITALS', u'EPISODE_LENGTH', u'EVENT_LENGTH', u'DEATHS',
       u'FLOOD_CAUSE_Dam / Levee Break', u'FLOOD_CAUSE_Heavy Rain',
       u'FLOOD_CAUSE_Heavy Rain / Burn Area',
       u'FLOOD_CAUSE_Heavy Rain / Snow Melt',
       u'FLOOD_CAUSE_Heavy Rain / Tropical System',
       u'FLOOD_CAUSE_Planned Dam Release', u'LAND_COVER_CLASS_1.0',
       u'LAND_COVER_CLASS_2.0', u'LAND_COVER_CLASS_3.0',
       u'LAND_COVER_CLASS_5.0', u'LAND_COVER_CLASS_6.0',
       u'LAND_COVER_CLASS_7.0', u'LAND_COVER_CLASS_8.0',
       u'LAND_COVER_CLASS_9.0', u'LAND_COVER_CLASS_10.0',
       u'LAND_COVER_CLASS_11.0', u'LAND_COVER_CLASS_12.0',
       u'LAND_COVER_CLASS_13.0', u'LAND_COVER_CLASS_14.0',
       u'LAND_COVER_CLASS_15.0', u'LAND_COVER_CLASS_17.0', u'TOR_F_SCALE_EF0',
       u'TOR_F_SCALE_EF1', u'TOR_F_SCALE_EF2', u'TOR_F_SCALE_EF3',
       u'TOR_

In [35]:
#Standardize non-binary columns
mlb = preprocessing.StandardScaler()
feature_column_names = ['MAGNITUDE', 'TOR_LENGTH', 'TOR_WIDTH', 'DURATION',
       'CLOSEST_HOSPITAL_DIST', 'NUM_COUNTY_HOSPITALS',
       'NUM_STATE_HOSPITALS', 'EPISODE_LENGTH', 'EVENT_LENGTH',]
feature_column = all_events_feats[feature_column_names]

#mask = feature_column.notnull() #Save location non-missing values, allows us to restore original indices after filtering na's

feature_matrix = mlb.fit_transform(all_events_feats[feature_column_names]) #Drop missing values and encode

#df = pd.DataFrame(feature_matrix, index=all_events_feats.index[mask], columns=feature_column_name+"_"+mlb.classes_) \
#    .reindex(all_events_feats.index, fill_value=0)  #restore original matrix with missing values - encoded as all 0's
    
#all_events_feats = all_events_feats.join(df).drop([feature_column_name], axis=1)
all_events_feats[feature_column_names] = feature_matrix

In [36]:
event_type = 'Wind'
wind_events = all_events_feats[all_events_feats['EVENT_TYPE']==event_type].pipe(
    pd.DataFrame.drop,
    labels=all_events_feats.filter(regex='EVENT|TOR|FLOOD|NARRATIVE'),
    axis=1, 
    inplace=False
)
wind_events.head(5)

Unnamed: 0,MAGNITUDE,DURATION,CLOSEST_HOSPITAL_DIST,NUM_COUNTY_HOSPITALS,NUM_STATE_HOSPITALS,EPISODE_LENGTH,DEATHS,LAND_COVER_CLASS_1.0,LAND_COVER_CLASS_2.0,LAND_COVER_CLASS_3.0,...,FATALITY_LOCATION_Long Span Roof,FATALITY_LOCATION_Mobile/Trailer Home,FATALITY_LOCATION_Other/Unknown,FATALITY_LOCATION_Outside/Open Areas,FATALITY_LOCATION_Permanent Home,FATALITY_LOCATION_Permanent Structure,FATALITY_LOCATION_School,FATALITY_LOCATION_Telephone,FATALITY_LOCATION_Under Tree,FATALITY_LOCATION_Vehicle/Towed Trailer
0,-1.12876,0.0,,-0.194345,0.111398,-0.283799,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,-0.059892,0.0,,-0.194345,0.143012,-0.067288,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0.340933,0.0,,-0.194345,0.111398,0.026425,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9,0.073716,0.0,,-0.194345,-0.062482,0.307566,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
10,-0.527522,0.0,,0.26973,-0.117808,-0.471226,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [37]:
event_type = 'Tornado'
tor_events = all_events_feats[all_events_feats['EVENT_TYPE']==event_type].pipe(
    pd.DataFrame.drop,
    labels=all_events_feats.filter(regex='EVENT|FLOOD|NARRATIVE|MAGNITUDE'),
    axis=1, 
    inplace=False
)
tor_events.head(5)

Unnamed: 0,TOR_LENGTH,TOR_WIDTH,DURATION,CLOSEST_HOSPITAL_DIST,NUM_COUNTY_HOSPITALS,NUM_STATE_HOSPITALS,EPISODE_LENGTH,DEATHS,LAND_COVER_CLASS_1.0,LAND_COVER_CLASS_2.0,...,FATALITY_LOCATION_Long Span Roof,FATALITY_LOCATION_Mobile/Trailer Home,FATALITY_LOCATION_Other/Unknown,FATALITY_LOCATION_Outside/Open Areas,FATALITY_LOCATION_Permanent Home,FATALITY_LOCATION_Permanent Structure,FATALITY_LOCATION_School,FATALITY_LOCATION_Telephone,FATALITY_LOCATION_Under Tree,FATALITY_LOCATION_Vehicle/Towed Trailer
12,-1.329936,0.219557,0.0,-0.110321,0.037692,-0.094097,-0.471226,1,0,0,...,0,0,0,0,1,0,0,0,0,0
16,-0.624952,-0.552269,0.0,-0.117146,0.037692,-0.204748,-0.471226,2,0,0,...,0,0,0,0,0,0,0,0,0,1
17,-0.624952,-0.552269,0.0,-0.146693,0.037692,-0.204748,-0.471226,2,0,0,...,0,0,0,0,0,0,0,0,0,1
38,-0.27246,-0.873863,0.0,-0.148474,0.037692,-0.204748,-0.471226,1,0,0,...,0,0,0,0,1,0,0,0,0,0
39,-0.27246,-0.873863,0.0,-0.145246,0.037692,-0.204748,-0.471226,1,0,0,...,0,0,0,0,1,0,0,0,0,0


In [38]:
event_type = 'Flood'
flood_events = all_events_feats[all_events_feats['EVENT_TYPE']==event_type].pipe(
    pd.DataFrame.drop,
    labels=all_events_feats.filter(regex='EVENT|TOR|MAGNITUDE|NARRATIVE'),
    axis=1, 
    inplace=False
)
flood_events.head(5)

Unnamed: 0,DURATION,CLOSEST_HOSPITAL_DIST,NUM_COUNTY_HOSPITALS,NUM_STATE_HOSPITALS,EPISODE_LENGTH,DEATHS,FLOOD_CAUSE_Dam / Levee Break,FLOOD_CAUSE_Heavy Rain,FLOOD_CAUSE_Heavy Rain / Burn Area,FLOOD_CAUSE_Heavy Rain / Snow Melt,...,FATALITY_LOCATION_Long Span Roof,FATALITY_LOCATION_Mobile/Trailer Home,FATALITY_LOCATION_Other/Unknown,FATALITY_LOCATION_Outside/Open Areas,FATALITY_LOCATION_Permanent Home,FATALITY_LOCATION_Permanent Structure,FATALITY_LOCATION_School,FATALITY_LOCATION_Telephone,FATALITY_LOCATION_Under Tree,FATALITY_LOCATION_Vehicle/Towed Trailer
18,0.0,-0.170574,0.26973,0.601424,-0.248253,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
19,0.0,-0.176608,0.037692,0.601424,-0.248253,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29,0.0,-0.136093,0.26973,3.193818,0.346344,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,0.0,-0.109655,2.358065,3.193818,0.346344,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
32,0.0,-0.18161,1.429916,3.193818,0.346344,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Models