In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

In [2]:
# Coordinates for tornado disasters in Columbus, 1950-2010: http://www.usa.com/columbus-oh-natural-disasters-extremes.htm
tornado_df = pd.read_csv("Data/HistoricalTornadoEvents.csv")
tornado_df.dropna(axis=0, inplace=True)

In [3]:
def deg_to_decimal(d, m, s, direction):
    res = int(d) + float(m)/60 + float(s)/3600
    if direction in ('S','W'):
        res *= -1
    return res

def tornado_df_coords(str_col):
    tornado_arr = np.array(tornado_df[str_col])
    coords = [coord.split(" / ") for coord in tornado_arr]
    print(coords)

    p = re.compile('(\d+)°(\d+)\'([N|S|E|W])')
    coords_new = []
    for lat_long in coords:
        lat = p.match(lat_long[0])
        lat_dd = deg_to_decimal(lat.group(1), lat.group(2), 0, lat.group(3))

        long = p.match(lat_long[1])
        long_dd = deg_to_decimal(long.group(1), long.group(2), 0, long.group(3))
        
        coords_new.append([lat_dd, long_dd])

    return coords_new

starts = tornado_df_coords('Start Lat/Log')
ends = tornado_df_coords('End Lat/Log')

[["39°55'N", "83°15'W"], ["39°58'N", "82°56'W"], ["40°05'N", "82°51'W"], ["40°05'N", "82°47'W"], ["40°06'N", "83°07'W"], ["40°08'N", "82°50'W"], ["39°55'N", "82°48'W"], ["39°43'N", "83°12'W"], ["39°55'N", "83°31'W"], ["39°39'N", "82°58'W"], ["39°43'N", "83°16'W"], ["39°50'N", "83°34'W"], ["40°11'N", "82°44'W"], ["39°36'N", "82°59'W"], ["39°35'N", "82°58'W"], ["40°23'N", "83°15'W"], ["40°01'N", "83°36'W"], ["39°55'N", "82°30'W"], ["40°02'N", "82°27'W"], ["40°25'N", "83°01'W"], ["40°10'N", "82°43'W"], ["40°05'N", "82°25'W"], ["40°14'N", "82°29'W"], ["39°29'N", "83°34'W"], ["40°30'N", "83°11'W"], ["39°47'N", "83°43'W"], ["40°30'N", "82°54'W"], ["39°45'N", "82°24'W"], ["40°26'N", "82°56'W"], ["40°21'N", "83°33'W"], ["40°30'N", "82°45'W"], ["40°31'N", "83°12'W"], ["40°23'N", "83°32'W"], ["40°14'N", "83°50'W"], ["39°30'N", "83°29'W"], ["39°29'N", "83°34'W"], ["40°06'N", "83°50'W"], ["40°04'N", "82°15'W"], ["40°36'N", "82°40'W"], ["40°14'N", "82°11'W"], ["40°40'N", "82°54'W"], ["40°36'N", "82

In [4]:
def convert(coords):
    lat = [coords[i][0] for i in range(len(coords))]
    long = [coords[i][1] for i in range(len(coords))]
    dist = 2*list(tornado_df["Distance (miles)"])
    magnitude = 2*list(tornado_df["Magnitude"])
    
    p = re.compile('(\d+.\d+)\sMile[s]*')
    length = [float(p.match(l).group(1)) for l in 2*list(tornado_df["Length"])]
    
    d = {"Lat": lat, "Long": long, "Distance": dist, "Magnitude": magnitude, "Length": length}
    df = pd.DataFrame(data=d)
    df.to_excel("Disaster_Locations_Coords.xlsx", index=False)
    return df

dist_mag_tornado_df = convert(starts+ends)

In [5]:
# !pip3 install LatLon
# import LatLon
# from LatLon import Latitude, Longitude

In [7]:
traintest = pd.read_csv('Data/train_test.csv') # traintest has 158 rows

# find matching coordinates with smallest distance
def calc_smallest_dist(comp):
    d = []
    vals = {"Distance": [], "Magnitude": [], "Length": []}
    km_per_mi = 1.60934
    for outer in range(len(traintest)):
        smallest = np.inf
        tornado_dist = 0
        tornado_mag = 0
        tornado_length = 0
#         traintest_coord = LatLon.LatLon(Latitude(traintest.loc[outer, 'Latitude']), Longitude(traintest.loc[outer, 'Longitude']))
#         comp_coord = LatLon(Latitude(comp.loc[ctr, 'Lat']), Longitude(comp.loc[ctr, 'Long']))
        for ctr in range(len(comp)):
            dist = np.sqrt(((traintest.loc[outer, 'Latitude'] - comp.loc[ctr, 'Lat']) ** 2) + ((traintest.loc[outer, 'Longitude'] - comp.loc[ctr, 'Long']) ** 2))
#             dist = traintest_coord.distance(comp_coord) * km_per_mi
            if dist < smallest:
                smallest = dist
                tornado_dist = comp.loc[ctr, 'Distance']
                tornado_mag = int(comp.loc[ctr, 'Magnitude'])
                tornado_length = comp.loc[ctr, 'Length']
        d.append(smallest)
        vals["Distance"].append(tornado_dist)
        vals["Magnitude"].append(tornado_mag)
        vals["Length"].append(tornado_length)
    return (d, vals)

distances, vals = calc_smallest_dist(dist_mag_tornado_df)

print(distances)
print(vals)

[0.04417460876014546, 0.09568101263166083, 0.06923586310254776, 0.07394245632397294, 0.029955315360344487, 0.029010678597306376, 0.1604411404097678, 0.0963994661306607, 0.05758441059484488, 0.09049209660027101, 0.08425019325068489, 0.0601598680071722, 0.07190369687374819, 0.08993710673377936, 0.08665746126188932, 0.07720404456969795, 0.10381749605965464, 0.10384181737890204, 0.06944552325783881, 0.08246686830401509, 0.02851616150926451, 0.06097284753250846, 0.07272689004838864, 0.08213507590819522, 0.07095275516066374, 0.12648019483873194, 0.029505824273332204, 0.029351434800913435, 0.07073798364362387, 0.1168224377382608, 0.07728718048476901, 0.10841640864944338, 0.08670473535870697, 0.09151692037869447, 0.0718738527174124, 0.05445536564216673, 0.019215117107228303, 0.06653947708774635, 0.13494940169147768, 0.07354174725207553, 0.11259834614795004, 0.1044015850218019, 0.12230926228374185, 0.12253986296842324, 0.06197324850118666, 0.07855761368588468, 0.07900901704957965, 0.05619314516

In [9]:
final_vals = {"Distance": [], "Magnitude": []}

# if dist < tornado_length/69, leave as is, or it will be None (further than 1 mile)
miles_per_deg = 69
for ctr in range(len(distances)):
    miles_within = vals["Length"][ctr]
    if distances[ctr] < miles_within/miles_per_deg:
        final_vals["Distance"].append(vals["Distance"][ctr])
        final_vals["Magnitude"].append(vals["Magnitude"][ctr])
    else:
        final_vals["Distance"].append(0)
        final_vals["Magnitude"].append(0)
        
d = {"Lat": traintest['Latitude'], "Long": traintest['Longitude'], "Distance": final_vals["Distance"], "Magnitude": final_vals["Magnitude"]}
df = pd.DataFrame(data=d)
df.to_excel("Train_Test_Tornado_Values.xlsx", index=False)
df
    
# print(final_vals)

Unnamed: 0,Lat,Long,Distance,Magnitude
0,40.057068,-82.885518,12.6,2
1,39.974166,-83.028720,5.5,3
2,39.963572,-83.002500,5.5,3
3,39.975215,-83.006780,5.5,3
4,40.073967,-83.131485,13.1,2
5,39.972424,-82.904900,5.5,3
6,39.928299,-83.089981,5.2,3
7,40.020989,-83.061440,13.1,2
8,39.954532,-82.877042,5.5,3
9,39.888206,-82.885900,18.3,3
