In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import linregress
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from scipy.optimize import curve_fit
import scipy.stats as st


import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Create a file path
filepath = 'Resources/storms_updated.csv'

# Read in the data. 
storms_df = pd.read_csv(filepath)

storms_df.head()

Unnamed: 0,name,year,month,day,hour,lat,long,status,category,wind,pressure,tropicalstorm_force_diameter,hurricane_force_diameter
0,AL011852,1852,8,26,6,30.2,-88.6,hurricane,3,100,961,,
1,AL031853,1853,9,3,12,19.7,-56.2,hurricane,4,130,924,,
2,AL031854,1854,9,7,12,28.0,-78.6,hurricane,3,110,938,,
3,AL031854,1854,9,8,18,31.6,-81.1,hurricane,3,100,950,,
4,AL031854,1854,9,8,20,31.7,-81.1,hurricane,3,100,950,,


In [3]:
storms_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17786 entries, 0 to 17785
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   name                          17786 non-null  object 
 1   year                          17786 non-null  int64  
 2   month                         17786 non-null  int64  
 3   day                           17786 non-null  int64  
 4   hour                          17786 non-null  int64  
 5   lat                           17786 non-null  float64
 6   long                          17786 non-null  float64
 7   status                        17786 non-null  object 
 8   category                      17786 non-null  int64  
 9   wind                          17786 non-null  int64  
 10  pressure                      17786 non-null  int64  
 11  tropicalstorm_force_diameter  6744 non-null   float64
 12  hurricane_force_diameter      6744 non-null   float64
dtypes

In [4]:
storms_df['combined_date'] = storms_df.apply(lambda x: pd.to_datetime(f"{x['year']}-{x['month']}-{x['day']}", format='%Y-%m-%d'), axis=1)

In [5]:
storms_df.head()

Unnamed: 0,name,year,month,day,hour,lat,long,status,category,wind,pressure,tropicalstorm_force_diameter,hurricane_force_diameter,combined_date
0,AL011852,1852,8,26,6,30.2,-88.6,hurricane,3,100,961,,,1852-08-26
1,AL031853,1853,9,3,12,19.7,-56.2,hurricane,4,130,924,,,1853-09-03
2,AL031854,1854,9,7,12,28.0,-78.6,hurricane,3,110,938,,,1854-09-07
3,AL031854,1854,9,8,18,31.6,-81.1,hurricane,3,100,950,,,1854-09-08
4,AL031854,1854,9,8,20,31.7,-81.1,hurricane,3,100,950,,,1854-09-08


In [6]:
storms_df.drop(['tropicalstorm_force_diameter', 'hurricane_force_diameter'], axis=1, inplace=True)

In [7]:
storms_df.head()

Unnamed: 0,name,year,month,day,hour,lat,long,status,category,wind,pressure,combined_date
0,AL011852,1852,8,26,6,30.2,-88.6,hurricane,3,100,961,1852-08-26
1,AL031853,1853,9,3,12,19.7,-56.2,hurricane,4,130,924,1853-09-03
2,AL031854,1854,9,7,12,28.0,-78.6,hurricane,3,110,938,1854-09-07
3,AL031854,1854,9,8,18,31.6,-81.1,hurricane,3,100,950,1854-09-08
4,AL031854,1854,9,8,20,31.7,-81.1,hurricane,3,100,950,1854-09-08


In [10]:
new_columns = ['name', 'combined_date', 'year', 'month', 'day', 'hour', 'lat', 'long', 'status', 'category', 'wind', 'pressure']
storms_df = storms_df[new_columns]
storms_df.head()

Unnamed: 0,name,combined_date,year,month,day,hour,lat,long,status,category,wind,pressure
0,AL011852,1852-08-26,1852,8,26,6,30.2,-88.6,hurricane,3,100,961
1,AL031853,1853-09-03,1853,9,3,12,19.7,-56.2,hurricane,4,130,924
2,AL031854,1854-09-07,1854,9,7,12,28.0,-78.6,hurricane,3,110,938
3,AL031854,1854-09-08,1854,9,8,18,31.6,-81.1,hurricane,3,100,950
4,AL031854,1854-09-08,1854,9,8,20,31.7,-81.1,hurricane,3,100,950
