In [131]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns
import plotly.figure_factory as ff

import sqlite3
import requests

from functools import reduce

In [132]:
# Importing storm events data from database
conn = sqlite3.connect('../data/raw/storms.db')
c = conn.cursor()

query = '''
          SELECT BEGIN_YEARMONTH AS YEARMONTH,
                 BEGIN_DAY AS DAY,
                 CZ_TYPE,
                 CZ_NAME,
                 EVENT_TYPE,
                 BEGIN_LAT AS LAT,
                 BEGIN_LON AS LON
            FROM details 
           WHERE STATE = 'LOUISIANA'
        ORDER BY CZ_NAME,
                 YEARMONTH,
                 DAY;
        '''

'''
Possible columns of interest: 

SUM(INJURIES_DIRECT) AS INJURIES_DIRECT,
SUM(INJURIES_INDIRECT) AS INJURIES_INDIRECT,
SUM(DEATHS_DIRECT) AS DEATHS_DIRECT,
SUM(DEATHS_INDIRECT) AS DEATHS_INDIRECT
DAMAGE_PROPERTY,
DAMAGE_CROPS,
MAGNITUDE,
MAGNITUDE_TYPE,
FLOOD_CAUSE
'''

results = c.execute(query)
df = pd.DataFrame(results.fetchall())

# Change column names to lowercase
names = [description[0].lower() for description in results.description]
df.columns = names

conn.close()

In [133]:
# Importing FIPS data from storm events database
conn = sqlite3.connect('../data/raw/storms.db')
c = conn.cursor()

query = '''
          SELECT STATE_FIPS,
                 CZ_FIPS,
                 CZ_TYPE,
                 CZ_NAME,
                 BEGIN_LAT AS LAT,
                 BEGIN_LON AS LON
            FROM details 
           WHERE STATE = 'LOUISIANA'
        ORDER BY CZ_FIPS
        '''

results = c.execute(query)
fips_df = pd.DataFrame(results.fetchall())

# Change column names to lowercase
names = [description[0].lower() for description in results.description]
fips_df.columns = names

conn.close()

In [134]:
def separate(yearmonth: int):
    '''
    Separates the yearmonth column into their own respective columns
    '''
    yearmonth = str(yearmonth)
    return int(yearmonth[:4]), int(yearmonth[-2:])


df[['year', 'month']] = pd.DataFrame(df.yearmonth.apply(separate).tolist(), index=df.index)
df = df.drop('yearmonth', axis=1)
df['year'] = df.year.astype(int)
df['month'] = df.month.astype(int)

cols = df.columns.tolist()
cols = cols[-2:] + cols[:-2]
df = df[cols]

# Convert separate year, month, day columns into datetime
df['date'] = pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')
df = df.drop(['year', 'month', 'day'], axis=1)

# Change order of DataFrame
cols = ['date', 'cz_type', 'cz_name', 'event_type', 'lat', 'lon']
df = df[cols]

In [135]:
df.head()

Unnamed: 0,date,cz_type,cz_name,event_type,lat,lon
0,1957-03-03,C,,Thunderstorm Wind,30.5,-92.18
1,1959-06-08,C,,Thunderstorm Wind,29.68,-90.18
2,1959-09-21,C,,Thunderstorm Wind,32.28,-93.4
3,1960-05-06,C,,Thunderstorm Wind,30.08,-90.18
4,1961-03-20,C,,Thunderstorm Wind,29.0,-89.4


In [136]:
# Capitalize first letter of each part of name
df['cz_name'] = df.cz_name.str.title()
fips_df['cz_name'] = fips_df.cz_name.str.title()

In [137]:
df.cz_name.unique()

array([None, 'Acadia', 'Allen', 'Ascension', 'Assumption', 'Avoyelles',
       'Beauregard', 'Bienville', 'Bossier', 'Caddo', 'Calcasieu',
       'Caldwell', 'Cameron', 'Catahoula', 'Claiborne', 'Concordia',
       'De Soto', 'East Baton Rouge', 'East Cameron', 'East Carroll',
       'East Feliciana', 'Evangeline', 'Franklin', 'Grant', 'Iberia',
       'Iberville', 'Jackson', 'Jefferson', 'Jefferson Davis', 'La Salle',
       'Lafayette', 'Lafourche', 'Laz038>040 - 056>070', 'Laz067 - 070',
       'Lincoln', 'Livingston', 'Lower Jefferson', 'Lower Lafourche',
       'Lower Plaquemines', 'Lower St. Bernard', 'Lower St. Martin',
       'Lower Terrebonne', 'Madison', 'Morehouse', 'Natchitoches',
       'Northern Tangipahoa', 'Orleans', 'Ouachita', 'Plaquemines',
       'Pointe Coupee', 'Rapdies', 'Rapides', 'Red River', 'Richland',
       'Sabine', 'Sabine And Natchitoches', 'Southern Tangipahoa',
       'St. Bernard', 'St. Charles', 'St. Helena', 'St. James',
       'St. John The Baptist

In [138]:
# Inspecting odd cz_name
print(df[df.cz_name == 'Laz038>040 - 056>070'])
print()
print(df[df.cz_name == 'Laz067 - 070'])

            date cz_type               cz_name event_type  lat  lon
12110 1995-10-27       C  Laz038>040 - 056>070       Hail  NaN  NaN

            date cz_type       cz_name event_type  lat  lon
12111 1994-10-20       C  Laz067 - 070       Hail  NaN  NaN


In [139]:
# Drop rows with odd cz_name
df = df[(df.cz_name != 'Laz038>040 - 056>070') & (df.cz_name != 'Laz067 - 070')]

In [140]:
# Fix typo
df['cz_name'] = df.cz_name.replace({'Rapdies': 'Rapides'})

In [141]:
# Find the FIPS code for cz_name that aren't actual parishes
check_fips = ['East Cameron', 'Lower Jefferson', 'Lower Lafourche', 'Lower Plaquemines', 'Lower St. Bernard', 
              'Lower St. Martin', 'Lower Terrebonne', 'Northern Tangipahoa', 'Sabine And Natchitoches', 
              'Southern Tangipahoa', 'Upper Jefferson', 'Upper Lafourche', 'Upper Plaquemines', 'Upper St. Bernard', 
              'Upper St. Martin', 'Upper Terrebonne', 'West Cameron']

for parish in check_fips:
    print(parish + ': ' + str(fips_df[fips_df.cz_name == parish]['cz_fips'].unique()) + '\n')

East Cameron: [74]

Lower Jefferson: [68]

Lower Lafourche: [67]

Lower Plaquemines: [69]

Lower St. Bernard: [70]

Lower St. Martin: [55]

Lower Terrebonne: [66]

Northern Tangipahoa: [71]

Sabine And Natchitoches: [85]

Southern Tangipahoa: [72]

Upper Jefferson: [61]

Upper Lafourche: [59]

Upper Plaquemines: [63]

Upper St. Bernard: [64]

Upper St. Martin: [45]

Upper Terrebonne: [65]

West Cameron: [73]



In [142]:
# Compare above FIPS with actual parish FIPS
parish_fips = ['Cameron', 'Jefferson', 'Lafourche', 'Plaquemines', 'St. Bernard', 'St. Martin', 'Terrebonne', 
               'Tangipahoa', 'Sabine', 'Natchitoches']

for parish in parish_fips:
    print(parish + ': ' + str(fips_df[fips_df.cz_name == parish]['cz_fips'].unique()) + '\n')

Cameron: [23 51]

Jefferson: [51]

Lafourche: [57]

Plaquemines: [75]

St. Bernard: [87]

St. Martin: [99]

Terrebonne: [109]

Tangipahoa: [ 38 105]

Sabine: [17 85]

Natchitoches: [18 69]



In [143]:
# Get coordinates (if any) for cz_name that aren't actual parishes
for parish in check_fips:
    print(df[(df.cz_name == parish) & (df.lat.notnull())].head(1))
    print()

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

            date cz_type                  cz_name event_type    lat    lon
17937 1993-04-14       C  Sabine And Natchitoches    Tornado  31.62 -93.43

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Columns: [date, cz_type, cz_name, event_type, lat, lon]
Index: []

Empty DataFrame
Col

In [144]:
# Only one - Sabine and Natchitoches - use coordinates to find parish
url = 'http://www.datasciencetoolkit.org/coordinates2politics/'
coord = '31.62%2c-93.43'

r = requests.get(url + coord)
print(r.json())

[{'politics': [{'type': 'admin2', 'friendly_type': 'country', 'name': 'United States', 'code': 'usa'}, {'type': 'admin4', 'friendly_type': 'state', 'name': 'Louisiana', 'code': 'us22'}, {'type': 'constituency', 'friendly_type': 'constituency', 'name': 'Fourth district, LA', 'code': '22_04'}, {'type': 'admin6', 'friendly_type': 'county', 'name': 'Sabine', 'code': '22_085'}], 'location': {'latitude': 31.62, 'longitude': -93.43}}]


In [145]:
# Create dummy DataFrame to see if these FIPS codes are found
fake = [['22074', 1], ['22068', 2], ['22070', 3], ['22066', 4], ['22072', 5], ['22064', 6], ['22038', 7], ['22018', 8]]
fdf = pd.DataFrame(fake, columns=['fips', 'data'])

In [146]:
values = fdf['data'].tolist()
fips = fdf['fips'].tolist()

endpts = list(np.mgrid[min(values):max(values):4j])
fig = ff.create_choropleth(fips=fips, values=values, scope=['Louisiana'], show_state_data=True,
                           binning_endpoints=endpts, round_legend_values=True,
                           plot_bgcolor='rgb(229,229,229)',
                           paper_bgcolor='rgb(229,229,229)',
                           county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
                           exponent_format=True)
fig.layout.template = None
fig.show()


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unrecognized FIPS Values

Whoops! It looks like you are trying to pass at least one FIPS value that is not in our shapefile of FIPS and data for the counties. Your choropleth will still show up but these counties cannot be shown.
Unrecognized FIPS are: [22074, 22068, 22070, 22066, 22072, 22064, 22038, 22018]



In [147]:
# None of the FIPS codes are valid, so change to parish
parish_dict = {'East Cameron': 'Cameron', 'Lower Jefferson': 'Jefferson', 'Lower Lafourche': 'Lafourche', 
               'Lower Plaquemines': 'Plaquemines', 'Lower St. Bernard': 'St. Bernard', 
               'Lower St. Martin': 'St. Martin', 'Lower Terrebonne': 'Terrebonne', 'Northern Tangipahoa': 'Tangipahoa',
               'Sabine And Natchitoches': 'Sabine', 'Southern Tangipahoa': 'Tangipahoa', 
               'Upper Jefferson': 'Jefferson', 'Upper Lafourche': 'Lafourche', 'Upper Plaquemines': 'Plaquemines', 
               'Upper St. Bernard': 'St. Bernard', 'Upper St. Martin': 'St. Martin', 'Upper Terrebonne': 'Terrebonne',
               'West Cameron': 'Cameron'}
df['cz_name'] = df.cz_name.replace(parish_dict)

In [148]:
df[df.cz_name.isnull()]

Unnamed: 0,date,cz_type,cz_name,event_type,lat,lon
0,1957-03-03,C,,Thunderstorm Wind,30.5,-92.18
1,1959-06-08,C,,Thunderstorm Wind,29.68,-90.18
2,1959-09-21,C,,Thunderstorm Wind,32.28,-93.4
3,1960-05-06,C,,Thunderstorm Wind,30.08,-90.18
4,1961-03-20,C,,Thunderstorm Wind,29.0,-89.4
5,1962-01-09,C,,Thunderstorm Wind,29.0,-89.4
6,1963-07-01,C,,Thunderstorm Wind,29.48,-91.68
7,1963-11-09,C,,Thunderstorm Wind,29.0,-89.4
8,1964-02-24,C,,Thunderstorm Wind,29.2,-90.18
9,1964-04-25,C,,Thunderstorm Wind,30.5,-92.0


In [149]:
# Find parishes by coordinates where cz_name is null
lats = df[df.cz_name.isnull()]['lat'].tolist()
lats = [str(lat) for lat in lats]
lons = df[df.cz_name.isnull()]['lon'].tolist()
lons = [str(lon) for lon in lons]

url = 'http://www.datasciencetoolkit.org/coordinates2politics/'

i = 0
null_count = 0
for lat, lon in zip(lats, lons):
    coord = lat + '%2c' + lon
    r = requests.get(url + coord)
    try:
        # Print coordinates returned by API
        print(i)
        print(r.json()[0]['location'])
        # Iterate through each dict
        for d in r.json()[0]['politics']:
            # Add parish name if found
            if d['friendly_type'] == 'county':
                df.loc[df.index == i, 'cz_name'] = d['name']
                print(d['name'])
                print(d['code'])
    # NoneType when API returns no politics data
    except TypeError:
        print('No data')
        null_count += 1
    i += 1
    print()
    
print('Null count:', null_count)

0
{'latitude': 30.5, 'longitude': -92.18}
St. Landry
22_097

1
{'latitude': 29.68, 'longitude': -90.18}
Lafourche
22_057
Jefferson
22_051

2
{'latitude': 32.28, 'longitude': -93.4}
Bienville
22_013

3
{'latitude': 30.08, 'longitude': -90.18}
Jefferson
22_051

4
{'latitude': 29.0, 'longitude': -89.4}
No data

5
{'latitude': 29.0, 'longitude': -89.4}
No data

6
{'latitude': 29.48, 'longitude': -91.68}

7
{'latitude': 29.0, 'longitude': -89.4}
No data

8
{'latitude': 29.2, 'longitude': -90.18}
Lafourche
22_057

9
{'latitude': 30.5, 'longitude': -92.0}
St. Landry
22_097

10
{'latitude': 30.5, 'longitude': -92.0}
St. Landry
22_097

11
{'latitude': 29.28, 'longitude': -90.08}
Lafourche
22_057
Lafourche
22_057

12
{'latitude': 30.7, 'longitude': -91.78}
St. Landry
22_097

13
{'latitude': 29.38, 'longitude': -91.28}

14
{'latitude': 30.18, 'longitude': -93.0}
Calcasieu
22_019
Jefferson Davis
22_053

15
{'latitude': 31.2, 'longitude': -91.18}
Wilkinson
28_157

16
{'latitude': 31.28, 'longitude'

In [153]:
# Expected 14 nulls - 4 not counted because politics data was returned, but no county data within it
print(len(df[df.cz_name.isnull()]))
df[df.cz_name.isnull()]

14


Unnamed: 0,date,cz_type,cz_name,event_type,lat,lon
4,1961-03-20,C,,Thunderstorm Wind,29.0,-89.4
5,1962-01-09,C,,Thunderstorm Wind,29.0,-89.4
6,1963-07-01,C,,Thunderstorm Wind,29.48,-91.68
7,1963-11-09,C,,Thunderstorm Wind,29.0,-89.4
13,1964-08-05,C,,Thunderstorm Wind,29.38,-91.28
25,1969-02-14,C,,Thunderstorm Wind,29.28,-92.4
28,1969-05-07,C,,Thunderstorm Wind,29.38,-91.28
30,1969-11-13,C,,Thunderstorm Wind,28.6,-90.28
31,1970-01-05,C,,Thunderstorm Wind,29.08,-90.18
39,1973-03-04,C,,Hail,29.23,-93.33


In [155]:
# Drop remaining nulls
df = df[df.cz_name.notnull()].reset_index(drop=True)

In [158]:
# Add column with FIPS code
parish_fips_dict = {'Acadia': '22001',
                    'Allen': '22003',
                    'Ascension': '22005',
                    'Assumption': '22007',
                    'Avoyelles': '22009',
                    'Beauregard': '22011',
                    'Bienville': '22013',
                    'Bossier': '22015',
                    'Caddo': '22017',
                    'Calcasieu': '22019',
                    'Caldwell': '22021',
                    'Cameron': '22023',
                    'Catahoula': '22025',
                    'Claiborne': '22027',
                    'Concordia': '22029',
                    'De Soto': '22031',
                    'East Baton Rouge': '22033',
                    'East Carroll': '22035', 
                    'East Feliciana': '22037',
                    'Evangeline': '22039',
                    'Franklin': '22041',
                    'Grant': '22043',
                    'Iberia': '22045', 
                    'Iberville': '22047',
                    'Jackson': '22049',
                    'Jefferson': '22051',
                    'Jefferson Davis': '22053',
                    'La Salle': '22059', 
                    'Lafayette': '22055',
                    'Lafourche': '22057',
                    'Lincoln': '22061', 
                    'Livingston': '22063',
                    'Madison': '22065', 
                    'Morehouse': '22067', 
                    'Natchitoches': '22069', 
                    'Orleans': '22071', 
                    'Ouachita': '22073', 
                    'Plaquemines': '22075', 
                    'Pointe Coupee': '22077', 
                    'Rapides': '22079', 
                    'Red River': '22081', 
                    'Richland': '22083', 
                    'Sabine': '22085',
                    'St. Bernard': '22087',
                    'St. Charles': '22089', 
                    'St. Helena': '22091',
                    'St. James': '22093',
                    'St. John The Baptist': '22095',
                    'St. Landry': '22097', 
                    'St. Martin': '22099', 
                    'St. Mary': '22101', 
                    'St. Tammany': '22103',
                    'Tangipahoa': '22105', 
                    'Tensas': '22107', 
                    'Terrebonne': '22109', 
                    'Union': '22111', 
                    'Vermilion': '22113', 
                    'Vernon': '22115',
                    'Washington': '22117', 
                    'Webster': '22119', 
                    'West Baton Rouge': '22121', 
                    'West Carroll': '22123', 
                    'West Feliciana': '22125', 
                    'Winn': '22127'}
df['fips'] = df.cz_name.map(parish_fips_dict)

# Drop unnecessary columns
df = df.drop(['cz_type', 'lat', 'lon'], axis=1)

In [159]:
df.head()

Unnamed: 0,date,cz_name,event_type,fips
0,1957-03-03,St. Landry,Thunderstorm Wind,22097
1,1959-06-08,Jefferson,Thunderstorm Wind,22051
2,1959-09-21,Bienville,Thunderstorm Wind,22013
3,1960-05-06,Jefferson,Thunderstorm Wind,22051
4,1964-02-24,Lafourche,Thunderstorm Wind,22057
