In [1]:
import reverse_geocoder as rg
import pandas as pd
import numpy as np
import ftplib
import requests
import matplotlib.pyplot as plt
import os
import re

pd.options.mode.copy_on_write = True

## Automatic downloading and reading of files
From Yuba's code

In [2]:
event_types = {'hail':'hail', 'storm_structure':'structure', 'tornados':'tvs', 'lightning':'nldn-tiles', 'mesocyclone':'mda'}
for event in event_types:
    path = '../weather_data/'+event
    if not os.path.exists(path):
        os.mkdir(path)

In [3]:
# connect to the server
ftp = ftplib.FTP('ftp.ncdc.noaa.gov', timeout=30) #pass the url without protocol
ftp.login() #pass credentials if anonymous access is not allowed

# switch to the directory containing the data
ftp.cwd('/pub/data/swdi/database-csv/v2/')
ftp.pwd()

httpurl = 'https://www.ncei.noaa.gov/pub/data/swdi/database-csv/v2/'
# get the list of files in this ftp dir
all_files= ftp.nlst()

In [4]:
def download_file(year, event_type):
    event_name = event_types[event_type]
    pattern = event_name+"-"+str(year)
    file_name = [fname for fname in all_files if pattern in fname]
    if len(file_name) == 0:
        return "No file in that year for that event type" 
    file_name = file_name[0]
    print("Considering file ", file_name)
    if os.path.exists('../weather_data/{}/{}'.format(event_type, file_name)):
        return "file already exists"
    query_parameters = {"downloadformat": "csv"}
    print("Getting the response from the URL .....")
    response = requests.get(httpurl+file_name, params=query_parameters)
    if response.ok:
        print("Downloaded succesfully")
    with open(r'../weather_data/{}/{}'.format(event_type, file_name), "wb") as f:
        f.write(response.content)
    print('Saved in folder')

In [5]:
def read_weather(year, event_type):
    print(f'Downloading {event_type} file for {year}.')
    download_file(year,event_type)
    files = os.listdir('../weather_data/'+event_type)
    file_name = [fname for fname in files if str(year) in fname]
    if len(file_name) == 0:
        raise Exception(f"No file for event type {event_type} in year {year}") 
    if len(file_name) > 1:
        raise Exception(f"Multiple files for event type {event_type} in year {year}")
    if event_type == 'lightning' or event_type == 'tornado':
        return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZDAY'])
    return pd.read_csv(r'../weather_data/'+event_type+'/'
                  + file_name[0], skiprows=2, parse_dates=['#ZTIME'])

## Cleaning power data
We clean the power data as follows.
1. Convert 'Date Event Began' column to datetime format.
2. Keep only the rows where power event type involves 'Severe Weather'.
3. Drop all columns except Date Event Began and Area Affected

In [6]:
def clean_power(power):
    power['Date Event Began'] = pd.to_datetime(power['Date Event Began'], format='%m/%d/%Y')
    power = power[power['Event Type'].str.contains(r'Severe Weather', regex=True)]
    return power.drop(columns=['Month', 'Time Event Began', 'Date of Restoration', 'Time of Restoration', 
                         'NERC Region', 'Alert Criteria', 'Event Type', 'Demand Loss (MW)', 'Number of Customers Affected'])

## Cleaning weather data
For the purposes of merging with power data, the most important aspects of cleaning weather data are the following.
1. Convert the date/time of the weather event into just the date, in datetime format
2. Obtain the county and state of the weather event by reverse geosearch from the latitude (mean) and longitude (mean)
3. Drop weather events with no county (indicating the event falls outside of the US).

There is a fair amount of re-used code that should be separated into functions.

### Cleaning lightning data
We clean the lightning data as follows.
1. Convert '#ZDAY' column to 'DATE' column in datetime format.
2. Reverse geosearch using 'CENTERLAT' and 'CENTERLON' columns to add two new columns, 'state' and 'county', giving the state and county where the lightning strike occurred.
3. Drop the rows where no county info is found. (This occurs when the strike is outside the US.)

In [7]:
def clean_lightning(lightning):
    lightning['DATE'] = pd.to_datetime(lightning['#ZDAY'], format='%Y%m%d').dt.normalize()
    lightning.drop(columns=['#ZDAY'])
    # add county and state columns
    addresses = rg.search(list(zip(lightning['CENTERLAT'],lightning['CENTERLON'])))
    lightning['county'] = [x['admin2'] for x in addresses]
    lightning['state'] = [x['admin1'] for x in addresses]
    ## drop rows with no county (occurs when not in US)
    lightning = lightning[lightning['county'] != '']
    return lightning

### Cleaning tornado data
We clean the tornado data as follows.
1. Convert '#ZTIME' column to 'DATE' column in datetime format.
2. Split into groups by 'DATE', 'WSR_ID', and 'CELL_ID', then get mean, min, and max of 'LAT' and 'LON' for each group, and max of remaining attributes for each group.
3. Reverse geosearch using 'LAT_mean' and 'LON_mean' to get 'county' and 'state' columns.
4. Drop the rows where no county info is found.

In [8]:
def clean_tornado(tornados):
    tornados['DATE'] = pd.to_datetime(tornados['#ZTIME']).dt.normalize()
    tornados.drop(columns=['#ZTIME'])
    groups = tornados.groupby(['DATE', 'WSR_ID', 'CELL_ID'], as_index=False)
    tor_agg = groups.agg({'LAT':['mean', 'min', 'max'],
                          'LON':['mean', 'min', 'max'],
                          'AVGDV':'max', 'LLDV':'max', 'MXDV':'max',
                          'MXDV_HEIGHT':'max', 'DEPTH':'max',
                          'MAX_SHEAR':'max', 'MAX_SHEAR_HEIGHT':'max'})
    tor_agg.columns = tor_agg.columns.to_flat_index()
    tor_agg.columns = ['_'.join(col) if col[1] else col[0] for col in tor_agg.columns]
    tor_agg.reset_index()
    addresses = rg.search(list(zip(tor_agg['LAT_mean'], tor_agg['LON_mean'])))
    tor_agg['county'] = [x['admin2'] for x in addresses]
    tor_agg['state'] = [x['admin1'] for x in addresses]
    tor_agg = tor_agg[tor_agg['county'] != '']
    return tor_agg

### Cleaning hail data
We clean the hail data exactly analogously to how we clean the tornado data.

In [9]:
def clean_hail(hail):
    hail = hail[hail.SEVPROB>0]
    hail['DATE'] = pd.to_datetime(hail['#ZTIME']).dt.normalize()
    hail.drop(columns=['#ZTIME'])
    groups = hail.groupby(['DATE', 'WSR_ID', 'CELL_ID'], as_index=False)
    hail_agg = groups.agg({'LAT':['mean', 'min', 'max'], 
                           'LON':['mean', 'min', 'max'],
                           'SEVPROB':'max', 'PROB':'max', 'MAXSIZE':'max'})
    hail_agg.columns = hail_agg.columns.to_flat_index()
    hail_agg.columns = ['_'.join(col) if col[1] else col[0] for col in hail_agg.columns]
    hail_agg = hail_agg.reset_index()
    addresses = rg.search(list(zip(hail_agg['LAT_mean'], hail_agg['LON_mean'])))
    hail_agg['county'] = [x['admin2'] for x in addresses]
    hail_agg['state'] = [x['admin1'] for x in addresses]
    hail_agg = hail_agg[hail_agg['county'] != '']
    return hail_agg

### Cleaning mesocyclone data
We clean the mesocyclone data exactly analogously to how we clean the tornado and hail data.

In [10]:
def clean_meso(meso):
    meso['DATE'] = pd.to_datetime(meso['#ZTIME']).dt.normalize()
    meso.drop(columns=['#ZTIME'])
    groups = meso.groupby(['DATE', 'WSR_ID', 'CELL_ID'], as_index=False)
    meso_agg = groups.agg({'LAT':['mean', 'min', 'max'], 
                           'LON':['mean', 'min', 'max'], 
                           'STR_RANK':'max', 'LL_ROT_VEL':'max', 
                           'LL_DV':'max', 'LL_BASE':'max', 'DEPTH_KFT':'max', 
                           'DPTH_STMRL':'max', 'MAX_RV_KFT':'max', 'MAX_RV_KTS':'max', 
                           'TVS':'max', 'MSI':'max'})
    meso_agg.columns = meso_agg.columns.to_flat_index()
    meso_agg.columns = ['_'.join(col) if col[1] else col[0] for col in meso_agg.columns]
    meso_agg = meso_agg.reset_index()
    addresses = rg.search(list(zip(meso_agg['LAT_mean'], meso_agg['LON_mean'])))
    meso_agg['county'] = [x['admin2'] for x in addresses]
    meso_agg['state'] = [x['admin1'] for x in addresses]
    meso_agg = meso_agg[meso_agg['county'] != '']
    return meso_agg

### Cleaning storm structure data
We clean the storm structure data exactly analogously to how we clean the tornado, hail, and mesocyclone data.

In [11]:
def clean_storm(storm):
    storm['DATE'] = pd.to_datetime(storm['#ZTIME']).dt.normalize()
    storm.drop(columns=['#ZTIME'])
    groups = storm.groupby(['DATE', 'WSR_ID', 'CELL_ID'], as_index=False)
    storm_agg = groups.agg({'LAT':['mean','min','max'],
                            'LON':['mean','min','max'],
                            'MAX_REFLECT':'max', 'VIL':'max', 'HEIGHT':'max'})
    storm_agg.columns = storm_agg.columns.to_flat_index()
    storm_agg.columns = ['_'.join(col) if col[1] else col[0] for col in storm_agg.columns]
    storm_agg = storm_agg.reset_index()
    addresses = rg.search(list(zip(storm_agg['LAT_mean'], storm_agg['LON_mean'])))
    storm_agg['county'] = [x['admin2'] for x in addresses]
    storm_agg['state'] = [x['admin1'] for x in addresses]
    storm_agg = storm_agg[storm_agg['county'] != '']
    return storm_agg

## Merging
We merge cleaned weather data with cleaned power data by adding a column specifying whether or not the recorded weather event resulted in a power outage in the same area on the same date.

### Checking if weather and power event are in same county, or state if no county info

In [12]:
# get all US states
counties = pd.read_csv("../extras/uscounties.csv", index_col=0)
counties['county'] = counties['county'].astype(str)

In [13]:
def in_area(county,state,area_affected):
    """
    input:

    county, state: the county and state of the weather event
    area_affected: the area affected by the power outage(a string listing states and possibly counties)
    has_county_info: whether area_affected

    output: True if either state and county are both in area_affected, or
            False if state is in area_affected and there is no county info for area_affected
    """
    if not county or not state or not area_affected:
        raise Exception(f"Invalid (null) input. county: {county}, state: {state}, area_affected: {area_affected}")

    # adding a colon to state ensures that it's matched exactly to a state in area_affected
    # (rather than a county whose name is a state)
    stateC = ''.join([state,':'])

    # has_county_info is True if area_affected includes a county, false otherwise
    has_county_info = any(cty in area_affected for cty in counties[counties['state'] == state]['county'])
    
    return stateC in area_affected and (county in area_affected or not has_county_info)

### Merging weather and power

In [14]:
def merge_weather_power(weather,power):
    """
    Merge weather and power data.

    Assumes that the input data are already cleaned.
    """
    merged = pd.merge(weather, power, how='left', left_on='DATE', right_on='Date Event Began', indicator=True)
    merged['power_outage'] = merged.apply(lambda row: (row['_merge'] == 'both') and in_area(str(row['county']),
                                                                                            str(row['state']),
                                                                                            str(row['Area Affected'])),
                                          axis = 'columns')
    return merged.drop(columns=['Date Event Began', 'Area Affected', '_merge'])

In [15]:
def merge(year, event_type):
    weather = read_weather(year,event_type)
    if event_type == 'hail':
        weather = clean_hail(weather)
    elif event_type == 'storm_structure':
        weather = clean_storm(weather)
    elif event_type == 'tornados':
        weather = clean_tornado(weather)
    elif event_type == 'lightning':
        weather = clean_lightning(weather)
    elif event_type == 'mesocyclone':
        weather = clean_meso(weather)
    else:
        raise Exception(f'Invalid event type: {event_type}, must be one of {event_types.keys()}')
    power= pd.read_excel('../power_data/' + str(year) + '_Annual_Summary.xls', skiprows=1)
    power = clean_power(power)
    return merge_weather_power(weather,power)

In [16]:
%%time
merged_hail_2019 = merge(2019,'hail')

Downloading hail file for 2019.
Considering file  hail-2019.csv.gz
Loading formatted geocoded file...
CPU times: user 54.6 s, sys: 1.12 s, total: 55.7 s
Wall time: 56.7 s


In [17]:
merged_hail_2019

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,SEVPROB_max,PROB_max,MAXSIZE_max,county,state,power_outage
0,0,2019-01-01,KYUX,A0,32.593070,32.59307,32.59307,-116.090870,-116.09087,-116.09087,20,30,0.50,Tecate,Baja California,False
1,1,2019-01-01,PAIH,E2,60.499460,60.49946,60.49946,-144.926380,-144.92638,-144.92638,10,60,0.50,Valdez-Cordova Census Area,Alaska,False
2,2,2019-01-02,KOKC,C2,35.041930,35.04193,35.04193,-97.930750,-97.93075,-97.93075,10,40,0.50,Grady County,Oklahoma,False
3,3,2019-01-02,KOKC,F1,35.050460,35.03207,35.07137,-98.002725,-98.04260,-97.96720,30,90,0.50,Grady County,Oklahoma,False
4,4,2019-01-02,KOKC,G1,34.957015,34.83192,35.09178,-98.064268,-98.22154,-97.89702,40,90,0.50,Grady County,Oklahoma,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386963,367509,2019-12-31,KBOS,B3,42.469027,42.45360,42.49608,-71.071363,-71.08632,-71.05547,40,80,0.50,Middlesex County,Massachusetts,False
386964,367510,2019-12-31,KBOS,N3,42.439045,42.43170,42.44639,-70.827625,-70.83396,-70.82129,20,70,0.50,Essex County,Massachusetts,False
386965,367511,2019-12-31,KBOS,S3,42.478973,42.45435,42.52469,-70.955967,-70.99641,-70.92434,40,80,0.50,Essex County,Massachusetts,False
386966,367512,2019-12-31,KEMX,B0,31.629724,31.60418,31.65083,-111.287990,-111.29512,-111.27922,70,90,0.75,Santa Cruz County,Arizona,False


In [18]:
merged_hail_2019[merged_hail_2019['power_outage']]

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,SEVPROB_max,PROB_max,MAXSIZE_max,county,state,power_outage
608,560,2019-01-12,KPAH,B3,36.070730,36.07073,36.07073,-89.872550,-89.87255,-89.87255,10,30,0.5,Pemiscot County,Missouri,True
616,565,2019-01-13,KLTX,I3,32.987080,32.98708,32.98708,-76.404000,-76.40400,-76.40400,10,60,0.5,Carteret County,North Carolina,True
617,566,2019-01-13,KLTX,M3,32.633133,32.60073,32.65841,-77.030145,-77.15377,-76.92061,40,80,0.5,New Hanover County,North Carolina,True
618,567,2019-01-13,KLTX,O2,33.183120,33.18312,33.18312,-76.324310,-76.32431,-76.32431,10,70,0.5,Carteret County,North Carolina,True
619,568,2019-01-13,KLTX,O4,32.913970,32.89106,32.93115,-76.443790,-76.52548,-76.37509,30,80,0.5,Carteret County,North Carolina,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382675,363165,2019-10-26,KLIX,R1,29.232130,29.23213,29.23213,-90.878030,-90.87803,-90.87803,10,70,0.5,Terrebonne Parish,Louisiana,True
383831,364341,2019-11-27,KAPX,I1,43.862565,43.80087,43.92494,-83.385905,-83.44780,-83.32645,40,60,0.5,Huron County,Michigan,True
383879,364389,2019-11-27,KDTW,M8,42.630632,42.48599,42.77500,-83.356415,-83.62273,-83.09415,50,60,0.5,Oakland County,Michigan,True
383880,364390,2019-11-27,KDTW,P0,42.681590,42.68159,42.68159,-83.350550,-83.35055,-83.35055,10,10,0.5,Oakland County,Michigan,True


In [19]:
%%time
merged_storm_2019 = merge(2019,'storm_structure')

Downloading storm_structure file for 2019.
Considering file  structure-2019.csv.gz
CPU times: user 7min 38s, sys: 9.63 s, total: 7min 48s
Wall time: 7min 55s


In [20]:
merged_storm_2019

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,MAX_REFLECT_max,VIL_max,HEIGHT_max,county,state,power_outage
0,0,2019-01-01,KABX,A0,35.625237,34.76802,35.82839,-106.501728,-107.18525,-106.34230,38,1,5.4,Sandoval County,New Mexico,False
1,1,2019-01-01,KABX,A1,35.385530,35.38553,35.38553,-107.268540,-107.26854,-107.26854,32,0,4.0,Cibola County,New Mexico,False
2,2,2019-01-01,KABX,B0,34.944938,34.93090,34.95795,-106.137023,-106.15708,-106.10007,34,0,4.5,Torrance County,New Mexico,False
3,3,2019-01-01,KABX,B1,35.469160,35.46916,35.46916,-106.943330,-106.94333,-106.94333,32,0,2.1,Sandoval County,New Mexico,False
4,4,2019-01-01,KABX,C0,34.870822,34.82503,34.95707,-106.253751,-106.32265,-106.18486,39,1,5.6,Bernalillo County,New Mexico,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4523498,4388821,2019-12-31,PAKC,C0,58.628520,58.62852,58.62852,-156.465020,-156.46502,-156.46502,37,0,0.4,Dillingham Census Area,Alaska,False
4523499,4388822,2019-12-31,PAKC,D0,58.562250,58.56225,58.56225,-156.403970,-156.40397,-156.40397,38,0,1.9,Dillingham Census Area,Alaska,False
4523500,4388823,2019-12-31,PAKC,E0,58.413900,58.41390,58.41390,-156.247570,-156.24757,-156.24757,38,0,1.4,Dillingham Census Area,Alaska,False
4523501,4388824,2019-12-31,PHWA,A0,19.098876,19.01842,19.12380,-155.044696,-155.11135,-154.97125,50,4,4.1,Hawaii County,Hawaii,False


In [21]:
merged_storm_2019[merged_storm_2019['power_outage']]

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,MAX_REFLECT_max,VIL_max,HEIGHT_max,county,state,power_outage
42347,43124,2019-01-06,KATX,A0,48.100321,47.55283,48.89729,-123.322800,-124.06727,-122.84107,44,3,8.6,Clallam County,Washington,True
42350,43125,2019-01-06,KATX,A1,47.858592,47.79396,47.90099,-122.805993,-122.86225,-122.76327,50,2,2.3,Jefferson County,Washington,True
42353,43126,2019-01-06,KATX,A2,48.303560,48.30356,48.30356,-122.747280,-122.74728,-122.74728,53,1,1.0,Island County,Washington,True
42356,43127,2019-01-06,KATX,A3,47.925410,47.92541,47.92541,-122.300430,-122.30043,-122.30043,47,1,1.9,Snohomish County,Washington,True
42359,43128,2019-01-06,KATX,B0,48.146537,47.66395,48.77077,-122.798483,-123.12295,-122.55201,47,2,6.7,Jefferson County,Washington,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4361635,4216855,2019-11-27,KDTX,Y8,43.167782,42.71577,43.68760,-83.571591,-84.45681,-82.97610,41,1,6.9,Tuscola County,Michigan,True
4361637,4216857,2019-11-27,KDTX,Z0,42.192998,41.94747,42.48265,-84.007693,-85.15707,-83.22583,48,3,8.2,Washtenaw County,Michigan,True
4361639,4216859,2019-11-27,KDTX,Z2,42.773535,42.40975,43.11771,-84.031800,-84.83346,-82.70990,45,2,7.6,Livingston County,Michigan,True
4361640,4216860,2019-11-27,KDTX,Z3,42.729662,42.54794,42.97113,-84.017572,-84.70968,-83.81833,42,2,7.4,Livingston County,Michigan,True


In [22]:
%%time
merged_tornados_2019 = merge(2019,'tornados')

Downloading tornados file for 2019.
Considering file  tvs-2019.csv.gz
CPU times: user 7.73 s, sys: 186 ms, total: 7.92 s
Wall time: 8.4 s


In [23]:
merged_tornados_2019

Unnamed: 0,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,AVGDV_max,LLDV_max,MXDV_max,MXDV_HEIGHT_max,DEPTH_max,MAX_SHEAR_max,MAX_SHEAR_HEIGHT_max,county,state,power_outage
0,2019-01-01,KBGM,E6,42.191970,42.19197,42.19197,-75.78346,-75.78346,-75.78346,38,62,62,11,6.8,112,11.2,Broome County,New York,False
1,2019-01-01,KBGM,I1,42.193790,42.19379,42.19379,-76.14176,-76.14176,-76.14176,35,60,60,9,5.1,128,9.4,Tioga County,New York,False
2,2019-01-01,KBGM,U0,42.026670,42.02667,42.02667,-76.11920,-76.11920,-76.11920,31,49,52,10,6.3,68,10.3,Tioga County,New York,False
3,2019-01-01,KBUF,D3,42.784605,42.74885,42.82036,-78.87106,-78.90013,-78.84199,48,55,62,2,5.9,112,2.3,Erie County,New York,False
4,2019-01-01,KBUF,O5,42.897760,42.89776,42.89776,-78.87968,-78.87968,-78.87968,42,49,65,9,8.1,146,9.0,Erie County,New York,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58470,2019-12-31,KLGX,W8,47.799510,47.79951,47.79951,-123.71715,-123.71715,-123.71715,39,72,72,3,12.2,26,3.3,Clallam County,Washington,False
58471,2019-12-31,KLGX,X4,47.286800,47.28680,47.28680,-123.68724,-123.68724,-123.68724,39,54,57,7,6.1,45,6.9,Grays Harbor County,Washington,False
58472,2019-12-31,KMIA,??,25.669440,25.66944,25.66944,-80.43903,-80.43903,-80.43903,32,69,69,0,5.9,187,0.1,Miami-Dade County,Florida,False
58473,2019-12-31,KRTX,W5,45.798710,45.79871,45.79871,-122.78723,-122.78723,-122.78723,33,24,80,6,6.8,148,5.9,Clark County,Washington,False


In [24]:
merged_tornados_2019[merged_tornados_2019['power_outage']]

Unnamed: 0,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,AVGDV_max,LLDV_max,MXDV_max,MXDV_HEIGHT_max,DEPTH_max,MAX_SHEAR_max,MAX_SHEAR_HEIGHT_max,county,state,power_outage
276,2019-01-06,KLGX,D9,46.996280,46.99628,46.99628,-123.147680,-123.14768,-123.14768,35,58,58,2,7.0,23,1.9,Grays Harbor County,Washington,True
279,2019-01-06,KLGX,F8,46.980780,46.96219,46.99937,-123.165030,-123.17161,-123.15845,39,57,72,9,7.0,29,8.8,Grays Harbor County,Washington,True
282,2019-01-06,KLGX,K8,47.295940,47.29594,47.29594,-123.370580,-123.37058,-123.37058,41,40,92,4,5.6,46,3.7,Grays Harbor County,Washington,True
285,2019-01-06,KLGX,R8,46.825550,46.82555,46.82555,-123.483120,-123.48312,-123.48312,42,42,86,7,5.4,44,6.7,Grays Harbor County,Washington,True
288,2019-01-06,KPDT,E0,45.946230,45.93699,45.95547,-118.143255,-118.16340,-118.12311,37,78,78,4,7.2,37,3.9,Walla Walla County,Washington,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56227,2019-11-01,KJFK,W3,41.201525,41.19650,41.20655,-73.592350,-73.73914,-73.44556,37,67,87,13,35.0,37,13.1,Westchester County,New York,True
56233,2019-11-01,KJFK,Y6,40.950240,40.95024,40.95024,-73.796960,-73.79696,-73.79696,55,94,94,2,5.2,68,1.5,Westchester County,New York,True
56239,2019-11-01,KJFK,Z4,41.152150,41.15215,41.15215,-74.213070,-74.21307,-74.21307,39,36,97,5,8.3,42,4.7,Rockland County,New York,True
56266,2019-11-01,KOKX,X3,40.312590,40.31259,40.31259,-72.413670,-72.41367,-72.41367,37,92,92,3,5.2,37,3.1,Suffolk County,New York,True


In [25]:
%%time
merged_meso_2019 = merge(2019,'mesocyclone')

Downloading mesocyclone file for 2019.
Considering file  mda-2019.csv.gz
CPU times: user 2min 40s, sys: 1.11 s, total: 2min 41s
Wall time: 2min 43s


In [26]:
merged_meso_2019

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,...,LL_BASE_max,DEPTH_KFT_max,DPTH_STMRL_max,MAX_RV_KFT_max,MAX_RV_KTS_max,TVS_max,MSI_max,county,state,power_outage
0,0,2019-01-01,KABX,84,35.30391,35.30391,35.30391,-106.70199,-106.70199,-106.70199,...,14,12,100,24,63,N,5559,Sandoval County,New Mexico,False
1,1,2019-01-01,KABX,112,35.79095,35.79095,35.79095,-106.68525,-106.68525,-106.68525,...,7,12,100,11,46,N,3419,Sandoval County,New Mexico,False
2,2,2019-01-01,KABX,184,35.35228,35.35228,35.35228,-106.68135,-106.68135,-106.68135,...,9,8,100,16,62,N,4740,Sandoval County,New Mexico,False
3,3,2019-01-01,KABX,272,34.96357,34.96357,34.96357,-107.08421,-107.08421,-107.08421,...,9,4,100,12,46,N,3480,Cibola County,New Mexico,False
4,4,2019-01-01,KABX,328,35.44015,35.44015,35.44015,-106.72896,-106.72896,-106.72896,...,10,3,100,10,40,N,2431,Sandoval County,New Mexico,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
894881,843108,2019-12-31,KRTX,687,46.12340,46.12340,46.12340,-122.15070,-122.15070,-122.15070,...,7,9,100,9,42,N,3130,Clark County,Washington,False
894882,843109,2019-12-31,KRTX,914,45.79511,45.79511,45.79511,-123.11721,-123.11721,-123.11721,...,5,4,59,5,45,N,4107,Columbia County,Oregon,False
894883,843110,2019-12-31,KRTX,977,45.97730,45.97730,45.97730,-122.21961,-122.21961,-122.21961,...,6,5,100,6,45,N,3221,Clark County,Washington,False
894884,843111,2019-12-31,PAHG,64,60.75298,60.75298,60.75298,-151.22687,-151.22687,-151.22687,...,0,1,100,0,57,N,5629,Kenai Peninsula Borough,Alaska,False


In [27]:
merged_meso_2019[merged_meso_2019['power_outage']]

Unnamed: 0,index,DATE,WSR_ID,CELL_ID,LAT_mean,LAT_min,LAT_max,LON_mean,LON_min,LON_max,...,LL_BASE_max,DEPTH_KFT_max,DPTH_STMRL_max,MAX_RV_KFT_max,MAX_RV_KTS_max,TVS_max,MSI_max,county,state,power_outage
5426,5500,2019-01-06,KATX,28,48.254860,48.25486,48.25486,-122.582640,-122.58264,-122.58264,...,0,1,56,1,39,N,5179,Island County,Washington,True
5429,5501,2019-01-06,KATX,61,48.295170,48.29517,48.29517,-122.662970,-122.66297,-122.66297,...,0,2,80,0,39,N,4769,Island County,Washington,True
5432,5502,2019-01-06,KATX,302,48.104050,48.10405,48.10405,-122.641520,-122.64152,-122.64152,...,2,4,73,5,48,N,3618,Jefferson County,Washington,True
5435,5503,2019-01-06,KATX,659,48.293235,48.21511,48.37136,-122.480515,-122.63685,-122.32418,...,1,4,100,2,40,N,4127,Snohomish County,Washington,True
5438,5504,2019-01-06,KATX,816,48.249000,48.24416,48.25384,-122.619205,-122.65422,-122.58419,...,1,4,54,1,49,N,4884,Island County,Washington,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
851383,797533,2019-11-27,KIWX,564,42.225295,42.18953,42.26106,-83.680455,-83.72015,-83.64076,...,13,21,100,18,34,N,2626,Washtenaw County,Michigan,True
851384,797534,2019-11-27,KIWX,568,42.192870,42.19287,42.19287,-83.874400,-83.87440,-83.87440,...,15,15,100,15,30,N,2035,Washtenaw County,Michigan,True
851386,797536,2019-11-27,KIWX,781,42.044620,42.04462,42.04462,-83.769580,-83.76958,-83.76958,...,12,14,100,12,31,N,2649,Washtenaw County,Michigan,True
851387,797537,2019-11-27,KIWX,782,42.267450,42.26745,42.26745,-83.857420,-83.85742,-83.85742,...,12,15,100,12,37,N,2297,Washtenaw County,Michigan,True


In [34]:
%%time
merged_lightning_2019 = merge(2019,'lightning')

Downloading lightning file for 2019.
Considering file  nldn-tiles-2019.csv.gz
CPU times: user 3min 46s, sys: 1.57 s, total: 3min 48s
Wall time: 3min 52s


In [35]:
merged_lightning_2019

Unnamed: 0,#ZDAY,CENTERLON,CENTERLAT,TOTAL_COUNT,DATE,county,state,power_outage
0,2019-01-01,-83.4,36.3,1,2019-01-01,Grainger County,Tennessee,False
1,2019-01-01,-88.5,29.0,2,2019-01-01,Plaquemines Parish,Louisiana,False
2,2019-01-01,-87.2,32.0,1,2019-01-01,Wilcox County,Alabama,False
3,2019-01-01,-84.2,35.0,1,2019-01-01,Fannin County,Georgia,False
4,2019-01-01,-83.6,35.0,1,2019-01-01,Towns County,Georgia,False
...,...,...,...,...,...,...,...,...
2493776,2019-12-31,-76.9,31.4,2,2019-12-31,Brunswick County,North Carolina,False
2493777,2019-12-31,-77.0,31.5,1,2019-12-31,Brunswick County,North Carolina,False
2493778,2019-12-31,-77.6,31.5,1,2019-12-31,Brunswick County,North Carolina,False
2493779,2019-12-31,-77.4,31.5,1,2019-12-31,Brunswick County,North Carolina,False


In [37]:
merged_lightning_2019[merged_lightning_2019['power_outage']]

Unnamed: 0,#ZDAY,CENTERLON,CENTERLAT,TOTAL_COUNT,DATE,county,state,power_outage
2510,2019-01-06,-122.2,48.2,1,2019-01-06,Snohomish County,Washington,True
2513,2019-01-06,-122.0,48.3,1,2019-01-06,Snohomish County,Washington,True
2516,2019-01-06,-122.6,48.3,2,2019-01-06,Island County,Washington,True
2519,2019-01-06,-122.2,48.3,1,2019-01-06,Snohomish County,Washington,True
2522,2019-01-06,-122.1,48.4,1,2019-01-06,Skagit County,Washington,True
...,...,...,...,...,...,...,...,...
2446767,2019-11-27,-82.5,43.2,1,2019-11-27,Sanilac County,Michigan,True
2446794,2019-11-27,-82.9,42.9,2,2019-11-27,Macomb County,Michigan,True
2446846,2019-11-27,-83.8,43.5,1,2019-11-27,Tuscola County,Michigan,True
2446852,2019-11-27,-82.4,43.3,2,2019-11-27,Sanilac County,Michigan,True


In [43]:
merged_hail_2019.to_csv('../merged/merged_hail_2019.csv')

In [44]:
merged_storm_2019.to_csv('../merged/merged_storm_2019.csv')

In [45]:
merged_tornados_2019.to_csv('../merged/merged_tornados_2019.csv')

In [46]:
merged_meso_2019.to_csv('../merged/merged_meso_2019.csv')

In [47]:
merged_lightning_2019.to_csv('../merged/merged_lightning_2019.csv')