In [2]:
# imports to process data using DASK
#from dask import delayed
import dask.dataframe as ddf
from dask.diagnostics import ProgressBar

# imports for data analysis and visualization
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

# imports for downloading data from FTP site
import os
from ftplib import FTP


# imports to perform spatial aggregation using ArcGIS GeoAnalytics server
#from arcgis.gis import GIS
#from arcgis.geoanalytics import get_datastores
#from arcgis.geoanalytics.summarize_data import reconstruct_tracks
#import arcgis

# miscellaneous imports
from pprint import pprint
from copy import deepcopy

### IBTrACS Data

In [3]:
csv_path = r'IBTrACS v04/ibtracs.ALL.list.v04r00.csv'

df = pd.read_csv(csv_path)
df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,,Year,,,,,,,degrees_north,degrees_east,...,second,kts,second,ft,nmile,nmile,nmile,nmile,kts,degrees
1,1842298N11080,1842,1.0,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9000,80.3000,...,,,,,,,,,9,266
2,1842298N11080,1842,1.0,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,79.8265,...,,,,,,,,,9,267
3,1842298N11080,1842,1.0,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,79.3524,...,,,,,,,,,9,267
4,1842298N11080,1842,1.0,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,78.8772,...,,,,,,,,,9,267


In [4]:
#Skip row 2 - a header
df = pd.read_csv(csv_path, skiprows=[1])
df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9,80.3,...,,,,,,,,,9,266
1,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,79.8265,...,,,,,,,,,9,267
2,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,79.3524,...,,,,,,,,,9,267
3,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,78.8772,...,,,,,,,,,9,267
4,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 15:00:00,NR,10.8,78.4,...,,,,,,,,,9,268


In [5]:
df.shape

(701349, 163)

In [6]:
df.to_csv('IBTrACS v04/IBTrACS_v04_clean01.csv')

In [7]:
df = pd.read_csv('IBTrACS v04/IBTrACS_v04_clean01.csv')
df.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0.1,Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9,...,,,,,,,,,9,266
1,1,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,...,,,,,,,,,9,267
2,2,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,...,,,,,,,,,9,267
3,3,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,...,,,,,,,,,9,267
4,4,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 15:00:00,NR,10.8,...,,,,,,,,,9,268


### Cleaning hurricane observations with Dask

In [8]:
#DASK allows you to work with larger than memory datasets. 
#These datasets can reside as one large file or as multiple files in a folder. 
#For the latter, DASK allows you to just specify the folder containing the datasets as input. 
#In turn, it provides you a single DataFrame object that represents all your datasets combined together. 
#The operations you perform on this DataFrame get queued and executed only when necessary.

In [9]:
df.dtypes

Unnamed: 0        int64
SID              object
SEASON            int64
NUMBER            int64
BASIN            object
                  ...  
USA_SEARAD_SE    object
USA_SEARAD_SW    object
USA_SEARAD_NW    object
STORM_SPEED      object
STORM_DIR        object
Length: 164, dtype: object

## Preemptively, specify the assortment of values that should be treated as null values.

In [10]:
table_na_values=['-999.','-999','-999.000', '-1', '-1.0','0','0.0']
full_df = ddf.read_csv('IBTrACS v04/IBTrACS_v04_clean01.csv', na_values=table_na_values, dtype={'LANDFALL': 'object',
       'STORM_DIR': 'object',
       'STORM_SPEED': 'object',
       'TD9636_LAT': 'object',
       'TD9636_LON': 'object',
       'TD9636_STAGE': 'object', 'USA_SSHS' : 'object'})

In [11]:
full_df.head()

Unnamed: 0.1,Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,...,BOM_GUST_PER,REUNION_GUST,REUNION_GUST_PER,USA_SEAHGT,USA_SEARAD_NE,USA_SEARAD_SE,USA_SEARAD_SW,USA_SEARAD_NW,STORM_SPEED,STORM_DIR
0,,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9,...,,,,,,,,,9,266
1,1.0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,...,,,,,,,,,9,267
2,2.0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,...,,,,,,,,,9,267
3,3.0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,...,,,,,,,,,9,267
4,4.0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 15:00:00,NR,10.8,...,,,,,,,,,9,268


In [12]:
# Drop the first duplicate index column.
full_df = full_df.drop(labels=['Unnamed: 0'], axis=1)

In [13]:
all_columns=list(full_df.columns)
len(all_columns)

163

In [14]:
#This dataset has 200 columns. Not all are unique, as you can see from the print out below:
pprint(all_columns, compact=True, width=100)

['SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME', 'ISO_TIME', 'NATURE', 'LAT', 'LON',
 'WMO_WIND', 'WMO_PRES', 'WMO_AGENCY', 'TRACK_TYPE', 'DIST2LAND', 'LANDFALL', 'IFLAG', 'USA_AGENCY',
 'USA_ATCF_ID', 'USA_LAT', 'USA_LON', 'USA_RECORD', 'USA_STATUS', 'USA_WIND', 'USA_PRES',
 'USA_SSHS', 'USA_R34_NE', 'USA_R34_SE', 'USA_R34_SW', 'USA_R34_NW', 'USA_R50_NE', 'USA_R50_SE',
 'USA_R50_SW', 'USA_R50_NW', 'USA_R64_NE', 'USA_R64_SE', 'USA_R64_SW', 'USA_R64_NW', 'USA_POCI',
 'USA_ROCI', 'USA_RMW', 'USA_EYE', 'TOKYO_LAT', 'TOKYO_LON', 'TOKYO_GRADE', 'TOKYO_WIND',
 'TOKYO_PRES', 'TOKYO_R50_DIR', 'TOKYO_R50_LONG', 'TOKYO_R50_SHORT', 'TOKYO_R30_DIR',
 'TOKYO_R30_LONG', 'TOKYO_R30_SHORT', 'TOKYO_LAND', 'CMA_LAT', 'CMA_LON', 'CMA_CAT', 'CMA_WIND',
 'CMA_PRES', 'HKO_LAT', 'HKO_LON', 'HKO_CAT', 'HKO_WIND', 'HKO_PRES', 'NEWDELHI_LAT',
 'NEWDELHI_LON', 'NEWDELHI_GRADE', 'NEWDELHI_WIND', 'NEWDELHI_PRES', 'NEWDELHI_CI', 'NEWDELHI_DP',
 'NEWDELHI_POCI', 'REUNION_LAT', 'REUNION_LON', 'REUNION_TYPE', 

In [15]:
label = [
 'WMO_WIND', 'WMO_PRES', 'WMO_AGENCY', 'TRACK_TYPE', 'DIST2LAND', 'IFLAG', 'USA_AGENCY',
 'USA_ATCF_ID', 'USA_LAT', 'USA_LON', 'USA_RECORD', 'USA_PRES',
 'USA_R34_NE', 'USA_R34_SE', 'USA_R34_SW', 'USA_R34_NW', 'USA_R50_NE', 'USA_R50_SE',
 'USA_R50_SW', 'USA_R50_NW', 'USA_R64_NE', 'USA_R64_SE', 'USA_R64_SW', 'USA_R64_NW', 'USA_POCI',
 'USA_ROCI', 'USA_RMW', 'USA_EYE', 'TOKYO_LAT', 'TOKYO_LON', 'TOKYO_GRADE', 'TOKYO_WIND',
 'TOKYO_PRES', 'TOKYO_R50_DIR', 'TOKYO_R50_LONG', 'TOKYO_R50_SHORT', 'TOKYO_R30_DIR',
 'TOKYO_R30_LONG', 'TOKYO_R30_SHORT', 'TOKYO_LAND', 'CMA_LAT', 'CMA_LON', 'CMA_CAT', 'CMA_WIND',
 'CMA_PRES', 'HKO_LAT', 'HKO_LON', 'HKO_CAT', 'HKO_WIND', 'HKO_PRES', 'NEWDELHI_LAT',
 'NEWDELHI_LON', 'NEWDELHI_GRADE', 'NEWDELHI_WIND', 'NEWDELHI_PRES', 'NEWDELHI_CI', 'NEWDELHI_DP',
 'NEWDELHI_POCI', 'REUNION_LAT', 'REUNION_LON', 'REUNION_TYPE', 'REUNION_WIND', 'REUNION_PRES',
 'REUNION_TNUM', 'REUNION_CI', 'REUNION_RMW', 'REUNION_R34_NE', 'REUNION_R34_SE', 'REUNION_R34_SW',
 'REUNION_R34_NW', 'REUNION_R50_NE', 'REUNION_R50_SE', 'REUNION_R50_SW', 'REUNION_R50_NW',
 'REUNION_R64_NE', 'REUNION_R64_SE', 'REUNION_R64_SW', 'REUNION_R64_NW', 'BOM_LAT', 'BOM_LON',
 'BOM_TYPE', 'BOM_WIND', 'BOM_PRES', 'BOM_TNUM', 'BOM_CI', 'BOM_RMW', 'BOM_R34_NE', 'BOM_R34_SE',
 'BOM_R34_SW', 'BOM_R34_NW', 'BOM_R50_NE', 'BOM_R50_SE', 'BOM_R50_SW', 'BOM_R50_NW', 'BOM_R64_NE',
 'BOM_R64_SE', 'BOM_R64_SW', 'BOM_R64_NW', 'BOM_ROCI', 'BOM_POCI', 'BOM_EYE', 'BOM_POS_METHOD',
 'BOM_PRES_METHOD', 'NADI_LAT', 'NADI_LON', 'NADI_CAT', 'NADI_WIND', 'NADI_PRES', 'WELLINGTON_LAT',
 'WELLINGTON_LON', 'WELLINGTON_WIND', 'WELLINGTON_PRES', 'DS824_LAT', 'DS824_LON', 'DS824_STAGE',
 'DS824_WIND', 'DS824_PRES', 'TD9636_LAT', 'TD9636_LON', 'TD9636_STAGE', 'TD9636_WIND',
 'TD9636_PRES', 'TD9635_LAT', 'TD9635_LON', 'TD9635_WIND', 'TD9635_PRES', 'TD9635_ROCI',
 'NEUMANN_LAT', 'NEUMANN_LON', 'NEUMANN_CLASS', 'NEUMANN_WIND', 'NEUMANN_PRES', 'MLC_LAT',
 'MLC_LON', 'MLC_CLASS', 'MLC_WIND', 'MLC_PRES', 'USA_GUST', 'BOM_GUST', 'BOM_GUST_PER',
 'REUNION_GUST', 'REUNION_GUST_PER', 'USA_SEAHGT', 'USA_SEARAD_NE', 'USA_SEARAD_SE',
 'USA_SEARAD_SW', 'USA_SEARAD_NW', 'STORM_DIR']


In [16]:
select_df = full_df.drop(labels=label, axis=1)

In [17]:
select_columns=list(select_df.columns)
pprint(select_columns, compact=True, width=100)

['SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME', 'ISO_TIME', 'NATURE', 'LAT', 'LON',
 'LANDFALL', 'USA_STATUS', 'USA_WIND', 'USA_SSHS', 'STORM_SPEED']


In [18]:
select_df.head()

Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,LANDFALL,USA_STATUS,USA_WIND,USA_SSHS,STORM_SPEED
0,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 03:00:00,NR,10.9,80.3,,,,,9
1,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 06:00:00,NR,10.8709,79.8265,,,,,9
2,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 09:00:00,NR,10.8431,79.3524,,,,,9
3,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 12:00:00,NR,10.8188,78.8772,,,,,9
4,1842298N11080,1842,1,NI,BB,NOT_NAMED,1842-10-25 15:00:00,NR,10.8,78.4,,,,,9


In [19]:
select_df.shape

(Delayed('int-458c7e04-a0be-4af1-ba60-4043d02dd3d4'), 15)

### Filter data for hurricane that cause landfall with with category greater than or equal to zero

In [20]:
# for LANDFALL* Minimum distance to land over next 3 hours ( = 0 means landfall)
# USA_STATUS  (HU,HR - hurricane)
# USA_SSHS >=0  (Saffir-Simpson Hurricane Scale information based on the wind speed provided by the US agency wind speed (US agencies provide 1-minute wind speeds))


In [21]:

select_df.dropna(how='all') 
#select_df = select_df.replace(' ',np.NaN)

Unnamed: 0_level_0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,LANDFALL,USA_STATUS,USA_WIND,USA_SSHS,STORM_SPEED
npartitions=10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
,object,int64,int64,object,object,object,object,object,float64,float64,object,object,object,object,object
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [22]:
print(select_df[select_df['USA_SSHS'].isnull()])

Dask DataFrame Structure:
                   SID SEASON NUMBER   BASIN SUBBASIN    NAME ISO_TIME  NATURE      LAT      LON LANDFALL USA_STATUS USA_WIND USA_SSHS STORM_SPEED
npartitions=10                                                                                                                                    
                object  int64  int64  object   object  object   object  object  float64  float64   object     object   object   object      object
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
...                ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...     

In [23]:

with ProgressBar():
    count_of_USA_SSHS = select_df['USA_SSHS'].value_counts().compute()

select_df_2 = select_df.fillna({'USA_SSHS': '-10'})


[                                        ] | 0% Completed |  9.9s

  return func(*(_execute_task(a, cache) for a in args))


[########################################] | 100% Completed | 21.7s


In [24]:
with ProgressBar():
    count_of_LANDFALL = select_df['LANDFALL'].value_counts().compute()

select_df_2 = select_df.fillna({'LANDFALL': '-5'})


[                                        ] | 0% Completed |  8.2s

  return func(*(_execute_task(a, cache) for a in args))


[########################################] | 100% Completed | 19.8s


In [25]:
print(select_df_2[select_df_2['USA_SSHS'].isnull()])

Dask DataFrame Structure:
                   SID SEASON NUMBER   BASIN SUBBASIN    NAME ISO_TIME  NATURE      LAT      LON LANDFALL USA_STATUS USA_WIND USA_SSHS STORM_SPEED
npartitions=10                                                                                                                                    
                object  int64  int64  object   object  object   object  object  float64  float64   object     object   object   object      object
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
...                ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...     

In [26]:
print(select_df_2[select_df_2['LANDFALL'].isnull()])

Dask DataFrame Structure:
                   SID SEASON NUMBER   BASIN SUBBASIN    NAME ISO_TIME  NATURE      LAT      LON LANDFALL USA_STATUS USA_WIND USA_SSHS STORM_SPEED
npartitions=10                                                                                                                                    
                object  int64  int64  object   object  object   object  object  float64  float64   object     object   object   object      object
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
...                ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...      ...      ...        ...      ...      ...         ...
                   ...    ...    ...     ...      ...     ...      ...     ...      ...     

In [27]:
#select_df_2["USA_SSHS"].gt(0)
cat_list = list(range(0,6))  
#cat_list
select_df_2["USA_SSHS"].isin(list(range(0,6)) )
#pprint(select_df_2["USA_SSHS"].isin(list(range(0,6)) ), compact=True, width=100)
#print(select_df_2["USA_SSHS"])
#USA_SSHS = select_df_2.astype({'USA_SSHS': 'int'})

Dask Series Structure:
npartitions=10
    bool
     ...
    ... 
     ...
     ...
Name: USA_SSHS, dtype: bool
Dask Name: isin, 82 tasks

In [39]:
dfFilter = select_df_2[(select_df_2["USA_STATUS"]=="HU") | (select_df_2["USA_STATUS"]=="HR")\
                       & (select_df_2["USA_SSHS"].isin(list(range(0,6)) ))]
dfFilter.head()



Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,LANDFALL,USA_STATUS,USA_WIND,USA_SSHS,STORM_SPEED
574,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 00:00:00,TS,27.5333,-94.2667,150,HU,80,1,10
575,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 03:00:00,TS,27.7013,-94.6988,125,HU,80,1,8
576,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 06:00:00,TS,27.8,-95.08,97,HU,80,1,7
577,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 09:00:00,TS,27.8616,-95.4384,82,HU,80,1,6
578,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 12:00:00,TS,27.9,-95.7333,59,HU,80,1,5


In [43]:
dfFilter.to_csv('IBTrACS v04/IBTrACS_v04_AllLF.csv', index=False, single_file=True)

  return func(*(_execute_task(a, cache) for a in args))


['/Users/uli/Documents/Documents - Uli’s MacBook Pro/APPLIED ANALYTICS SPS COLUMBIA/TERM 3 FALL/APANPS5900 SOLVING REAL WORLD PROBLEMS W: ANALYTICS/DATA & ANALYSIS/IBTrACS v04/IBTrACS_v04_AllLF.csv']

In [44]:
df_AllLF = pd.read_csv('IBTrACS v04/IBTrACS_v04_AllLF.csv')
df_AllLF.head()

Unnamed: 0,SID,SEASON,NUMBER,BASIN,SUBBASIN,NAME,ISO_TIME,NATURE,LAT,LON,LANDFALL,USA_STATUS,USA_WIND,USA_SSHS,STORM_SPEED
0,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 00:00:00,TS,27.5333,-94.2667,150,HU,80,1.0,10
1,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 03:00:00,TS,27.7013,-94.6988,125,HU,80,1.0,8
2,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 06:00:00,TS,27.8,-95.08,97,HU,80,1.0,7
3,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 09:00:00,TS,27.8616,-95.4384,82,HU,80,1.0,6
4,1851175N26270,1851,5,,GM,NOT_NAMED,1851-06-25 12:00:00,TS,27.9,-95.7333,59,HU,80,1.0,5


In [50]:
dfFilter.shape

(Delayed('int-1245c506-dd89-466e-b2c8-261e153cdaee'), 15)

In [46]:
dfFilter.to_csv('IBTrACS v04/IBTrACS_v04_clean02.csv', index=False, single_file=True)

  return func(*(_execute_task(a, cache) for a in args))


['/Users/uli/Documents/Documents - Uli’s MacBook Pro/APPLIED ANALYTICS SPS COLUMBIA/TERM 3 FALL/APANPS5900 SOLVING REAL WORLD PROBLEMS W: ANALYTICS/DATA & ANALYSIS/IBTrACS v04/IBTrACS_v04_clean02.csv']

In [47]:
df_clean02 = pd.read_csv('IBTrACS v04/IBTrACS_v04_clean02.csv')
df_clean02.shape

(45043, 15)

In [48]:
dfFilter.dtypes

SID             object
SEASON           int64
NUMBER           int64
BASIN           object
SUBBASIN        object
NAME            object
ISO_TIME        object
NATURE          object
LAT            float64
LON            float64
LANDFALL        object
USA_STATUS      object
USA_WIND        object
USA_SSHS        object
STORM_SPEED     object
dtype: object