In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>")) # makes the notebook fill the whole window

import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import seaborn as sns; sns.set()
import csv

from PIL import Image, ImageDraw
import os
import shutil

from mpl_toolkits.basemap import Basemap

#there is currently a bug in panda's scatter_matrix that produces a warning that has no affect on what I'm doin :)
import warnings
warnings.filterwarnings("ignore")

In [2]:
#load in the data
df = pd.read_csv('./Irma Storm centered/ATL_17_11_Irma_WWLLN_Locations.txt',header=None,names=["Year","Month","Day","Hour","Min","Sec","Lat","Long","Dist_East_West","Dist_North_South"],low_memory=False, delim_whitespace=True)
df_center = pd.read_csv('./Irma Storm centered/ATL_17_11_Irma_Reduced_Trackfile.txt',header=None,names=["Year","Month","Day","Hour","Lat","Long","Min_Pressure","Max_Winds","Unused"],low_memory=False,sep='\t')

Unnamed: 0,Year,Month,Day,Hour,Min,Sec,Lat,Long,Dist_East_West,Dist_North_South
0,2017,8,27,17,56,9.7207,12.7452,-13.496,271.571,138.46
1,2017,8,27,17,56,19.4011,12.0782,-16.2206,-23.9866,64.2929
2,2017,8,27,17,56,36.891,9.6958,-8.5119,820.729,-200.618
3,2017,8,27,17,56,42.7955,10.879,-10.62,587.47,-69.052
4,2017,8,27,17,56,46.1723,9.9119,-12.6214,370.074,-176.589


Unnamed: 0,Year,Month,Day,Hour,Lat,Long,Min_Pressure,Max_Winds,Unused
0,2017,8,27,18,11.5,-16.0,0,25,0
1,2017,8,28,0,11.5,-17.8,0,25,0
2,2017,8,28,6,11.6,-19.3,1009,25,0
3,2017,8,28,12,11.9,-20.5,1009,25,0
4,2017,8,28,18,12.3,-21.0,1008,25,0


In [None]:
for w in range(0,60,30):
    minute = w
    #The above gets the month, day, hour, minute (before 30 minutes or after 30 minutes)
    #Make big map
    fig = plt.figure(figsize=(30,15))
    m = Basemap(llcrnrlon=minLong_-buffer, llcrnrlat=minLat_-buffer,urcrnrlon=maxLong_+buffer,urcrnrlat=maxLat_+buffer,lon_0=0,lat_0=0)
    m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
    m.fillcontinents(color='grey', alpha=0.7, lake_color='grey')
    m.drawcoastlines(linewidth=0.1, color="white")
    #Plot ticks for lat/long
    plt.xticks(np.arange(minLong_-buffer,maxLong_+buffer,step=5))
    plt.yticks(np.arange(minLat_-buffer,maxLat_+buffer,step=5))
    #Title the map 2017:month:day::hour:minute
    plt.title("2017:" + '{:2d}'.format(int(month)) + ":" + '{:2d}'.format(int(day)) + "::" + '{:2d}'.format(int(hour)) + ":" + '{:2d}'.format(int(minute)))
    #Plot the center of Irma
    m.plot(df[(df['Month'] == month) & (df['Day'] == day) & (df['Hour'] == hour)]['Long'], df[(df['Month'] == month) & (df['Day'] == day) & (df['Hour'] == hour)]['Lat'], linestyle='none', marker="o", markersize=30, alpha=1, c="red", markeredgecolor="black", markeredgewidth=1)
    #Plot all of the lightning that appears on that month/day/hour/minute section
    m.plot(ln[(ln['Month'] == month) & (ln['Day'] == day) & (ln['Hour'] == hour) & (ln['Min'] >= minute) & (ln['Min'] <= minute + 30)]['Long'],ln[(ln['Month'] == month) & (ln['Day'] == day) & (ln['Hour'] == hour) & (ln['Min'] >= minute) & (ln['Min'] <= minute + 30)]['Lat'], linestyle='none', marker="X", markersize=8, alpha=.7, c="yellow", markeredgecolor="black", markeredgewidth=1)
    #Save and close the figure
    plt.savefig("./data/Irma/" + "2017_" + str(int(month)) + "_" + str(int(day)) + "_" + str(int(hour)) + "_" + str(int(minute)),bbox_inches='tight')
    plt.close(fig)

In [None]:
df.dropna(axis=0,how='any',subset=['Lat','Long'],inplace=True)
df.head()

In [None]:
coord_df = df.loc[:,['Lat','Long']]
coord_df.head()

In [None]:
#elbow graph for visual estimation of ideal cluster count
clusters = range(1,16)

kmeans = [KMeans(n_clusters=i) for i in clusters]

y = df[['Lat']]
x = df[['Long']]

score = [kmeans[i].fit(y).score(y) for i in range(len(kmeans))]

plt.plot(clusters, score)

plt.show()

In [None]:
cluster_df = coord_df
for n_clusters in range(1,16):
    kmeans = KMeans(n_clusters=n_clusters, init='k-means++')
    kmeans.fit(cluster_df)
    inertia = kmeans.inertia_
    print("Cluster count: ", n_clusters, "\tInertia: ", inertia)

In [None]:
cluster_df.plot.scatter(x='Lat',y='Long', c='pink', s=50, cmap='spring')
plt.show()

In [None]:
#kmeans using the set cluster count
kmeans = KMeans(n_clusters=4, init='k-means++')
cluster_df = coord_df
kmeans.fit(cluster_df)
cluster_df['cluster'] = kmeans.fit_predict(cluster_df)
inertia = kmeans.inertia_
labels = kmeans.predict(cluster_df[cluster_df.columns[:2]])
centers = kmeans.cluster_centers_

cluster_df.plot.scatter(x='Lat',y='Long', c=labels, s=50, cmap='spring', figsize=(20,20))
plt.scatter(centers[:, 0], centers[:, 1], c='black', s= 200, alpha=.5)

plt.show()

In [None]:
#kmeans using the set cluster count
kmeans = KMeans(n_clusters=14, init='k-means++')
cluster_df = coord_df
kmeans.fit(cluster_df)
cluster_df['cluster'] = kmeans.fit_predict(cluster_df)
inertia = kmeans.inertia_
labels = kmeans.predict(cluster_df[cluster_df.columns[:]])
centers = kmeans.cluster_centers_

cluster_df.plot.scatter(x='Lat',y='Long', c=labels, s=50, cmap='spring', figsize=(40,40))
plt.scatter(centers[:, 0], centers[:, 1], c='black', s= 200, alpha=.5)

plt.show()

In [None]:
centers_df = pd.DataFrame(data=centers, columns=["Lat", "Long", "delete"])
centers_df = centers_df.iloc[:,:2]
ln = pd.read_csv('./Irma Storm centered/ATL_17_11_Irma_WWLLN_Locations.txt',header=None,names=["Year","Month","Day","Hour","Min","Sec","Lat","Long","Dist_East_West","Dist_North_South"],low_memory=False,sep=' ')
minLong_, minLat_, maxLong_, maxLat_ = ln['Long'].min(), ln['Lat'].min(), ln['Long'].max(), ln['Lat'].max()
buffer = 10

In [None]:
plt.figure(figsize=(20,20))
m = Basemap(llcrnrlon=minLong_-buffer, llcrnrlat=minLat_-buffer,urcrnrlon=maxLong_+buffer,urcrnrlat=maxLat_+buffer,lon_0=0,lat_0=0)
m.drawmapboundary(fill_color='#A6CAE0', linewidth=0)
m.fillcontinents(color='grey', alpha=0.7, lake_color='grey')
m.drawcoastlines(linewidth=0.1, color="white")

# Plot the lightning data
m.plot(centers_df['Long'], centers_df['Lat'], linestyle='none', marker="o", markersize=75, alpha=0.7, c="orange", markeredgecolor="black", markeredgewidth=1)
plt.title("0 minutes")
plt.savefig("./data/test",bbox_inches='tight')