In [2]:
# import dependencies
import os
import pandas as pd
import numpy as np
import pymongo
from pymongo import MongoClient


In [3]:
# read in hurricane data
hurricane_data_df = pd.read_csv("static/data/atlantic.csv", encoding='utf-8')
hurricane_data_df.head()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind,Minimum Pressure,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
0,AL011851,UNNAMED,18510625,0,,HU,28.0N,94.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1,AL011851,UNNAMED,18510625,600,,HU,28.0N,95.4W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


Ocean Temps data info:
Units: Degrees Celsius
Base Period: 1901-2000

In [4]:
# read in ocean temps data (nothing to do here)
oceantemps_data_df = pd.read_csv("static/data/oceantemps.csv", encoding='utf-8')
oceantemps_data_df.head()

Unnamed: 0,Year,Value
0,1880,-0.11
1,1881,-0.16
2,1882,-0.1
3,1883,-0.18
4,1884,-0.29


In [5]:
# read in annual CO2 data (Justan's - has more data points for his graph)
complete_co2_data_df = pd.read_csv("static/assets/data/c02.csv", encoding='utf-8')
complete_co2_data_df.head()

Unnamed: 0,year,month,decimal_date,monthly_average
0,1958,3,1958.2027,315.7
1,1958,4,1958.2877,317.45
2,1958,5,1958.3699,317.51
3,1958,6,1958.4548,317.24
4,1958,7,1958.537,315.86


In [6]:
# reduce data frame to just have year and mean co2 for each year
complete_co2 = complete_co2_data_df[['year', 'monthly_average']]
meancomplete_co2 = complete_co2.groupby('year').mean()
meancomplete_co2.head()

Unnamed: 0_level_0,monthly_average
year,Unnamed: 1_level_1
1958,315.232
1959,315.980833
1960,316.91
1961,317.644167
1962,318.454167


In [7]:
#  rename dataframe to use for calculating percentage change year/year of co2
perchange_co2 = meancomplete_co2

In [8]:
# round to 2 decimal places and reset index
meancomplete_co2 = meancomplete_co2.round(2)
meancomplete_co2_df = meancomplete_co2.reset_index()
meancomplete_co2_df.head()

Unnamed: 0,year,monthly_average
0,1958,315.23
1,1959,315.98
2,1960,316.91
3,1961,317.64
4,1962,318.45


In [9]:
# rename the columns and this is final dataframe for year/mean co2 levels
meancomplete_co2_df.rename(columns={'year': 'year', 'monthly_average': 'meanco2'}, inplace=True)
meancomplete_co2_df.head()

Unnamed: 0,year,meanco2
0,1958,315.23
1,1959,315.98
2,1960,316.91
3,1961,317.64
4,1962,318.45


In [10]:
# use perchange_co2 = meancomplete_co2 to calculate year/year percentage change of co2 data
perchange_co2.head()

Unnamed: 0_level_0,monthly_average
year,Unnamed: 1_level_1
1958,315.232
1959,315.980833
1960,316.91
1961,317.644167
1962,318.454167


In [11]:
# calculate percentage change year/year of co2 data/round/ and reset index
perchange_co2_df = perchange_co2.pct_change()
perchange_co2_df = perchange_co2_df.round(3)
perchange_co2_df = perchange_co2_df.reset_index()
perchange_co2_df.tail()

Unnamed: 0,year,monthly_average
58,2016,0.008
59,2017,0.006
60,2018,0.005
61,2019,0.007
62,2020,0.007


In [13]:
# rename the columns and this is final dataframe for year/year percentage change co2 levels
perchange_co2_df.rename(columns={'year': 'year', 'monthly_average': 'co2change'}, inplace=True)
perchange_co2_df.tail()

Unnamed: 0,year,co2change
58,2016,0.008
59,2017,0.006
60,2018,0.005
61,2019,0.007
62,2020,0.007


In [15]:
# read in annual co2 data (don't use this csv!!!)
# mean_co2_data_df = pd.read_csv("static/data/co2_annmean_mlo.csv", encoding='utf-8')
# mean_co2_data_df.head(5)


In [16]:
# rename columns - wont' let you use column names - "not in index"
#yearmean_co2 = mean_co2_data_df[['year', 'micromol_mol_mean']]
#yearmean_co2.head()

In [17]:
# reduce columns for hurricane data to what we need 
cropHurr = hurricane_data_df[["ID", "Name", "Date", "Time", "Event", "Status", "Latitude", "Longitude", "Maximum Wind"]]
cropHurr.head()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind
0,AL011851,UNNAMED,18510625,0,,HU,28.0N,94.8W,80
1,AL011851,UNNAMED,18510625,600,,HU,28.0N,95.4W,80
2,AL011851,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80
3,AL011851,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2N,96.8W,80


In [18]:
# to calculate percentage change for storms year/year
# groupby ID to explore unique ID/status data - some storms never become hurricanes
cropHurrgroup = cropHurr.groupby(['ID', 'Status']).mean()
cropHurrgroup.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Time,Maximum Wind
ID,Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AL292005,EX,20051120.0,978.947368,38.421053
AL292005,SS,20051120.0,800.0,45.0
AL292005,TS,20051130.0,900.0,51.25
AL302005,HU,20051200.0,900.0,67.0
AL302005,LO,20051210.0,1080.0,27.0
AL302005,TD,20051210.0,1200.0,30.0
AL302005,TS,20051170.0,882.352941,50.294118
AL312005,LO,20060110.0,1080.0,25.0
AL312005,TD,20057150.0,600.0,30.0
AL312005,TS,20057880.0,900.0,46.785714


In [19]:
# find the last id's used in the data to isolate unique storms
cropHurrlast = cropHurr.groupby('ID').last()
cropHurrlast.tail()

Unnamed: 0_level_0,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AL282005,GAMMA,20051122,0,,LO,15.4N,83.5W,25
AL291969,MARTHA,19691125,1200,,TD,8.5N,82.0W,25
AL292005,DELTA,20051129,1800,,EX,35.3N,1.0W,30
AL302005,EPSILON,20051209,1800,,LO,24.7N,39.2W,25
AL312005,ZETA,20060107,1800,,LO,26.3N,55.7W,25


In [20]:
# drop last 4 characters for 'Date' to isolate year (not include month/day)
# reset index
cropHurrlast["Date"] = cropHurrlast["Date"].astype(str).str[:-4]
cropHurrlast = cropHurrlast.reset_index()
cropHurrlast.tail()

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind
1809,AL282005,GAMMA,2005,0,,LO,15.4N,83.5W,25
1810,AL291969,MARTHA,1969,1200,,TD,8.5N,82.0W,25
1811,AL292005,DELTA,2005,1800,,EX,35.3N,1.0W,30
1812,AL302005,EPSILON,2005,1800,,LO,24.7N,39.2W,25
1813,AL312005,ZETA,2006,1800,,LO,26.3N,55.7W,25


In [21]:
# group by 'Date' to get counts of storms for each year
# reset index
cropHurrlastgroup = cropHurrlast.groupby('Date').count()
cropHurrlastgroup.tail()

Unnamed: 0_level_0,ID,Name,Time,Event,Status,Latitude,Longitude,Maximum Wind
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011,20,20,20,20,20,20,20,20
2012,19,19,19,19,19,19,19,19
2013,15,15,15,15,15,15,15,15
2014,9,9,9,9,9,9,9,9
2015,12,12,12,12,12,12,12,12


In [22]:
# reduce columns for just year and ID (storm counts/year)(don't reset index yet - do % change first)
cropHurrlastgroup_df = cropHurrlastgroup[['ID']]
cropHurrlastgroup_df.tail()

Unnamed: 0_level_0,ID
Date,Unnamed: 1_level_1
2011,20
2012,19
2013,15
2014,9
2015,12


In [23]:
# perform percentage change function to calculate changes from year to year (round to 3 decimal places)
# reset index
stormpercent_change_df = cropHurrlastgroup_df.pct_change()
stormpercent_change_df = stormpercent_change_df.round(3)
stormpercent_change_df = stormpercent_change_df.reset_index()
stormpercent_change_df.tail()

Unnamed: 0,Date,ID
160,2011,-0.048
161,2012,-0.05
162,2013,-0.211
163,2014,-0.4
164,2015,0.333


In [24]:
# rename columns for % change of storms for each year
stormpercent_change_df.rename(columns={'Date': 'year', 'ID': 'stormchange'}, inplace=True)
stormpercent_change_df.tail()

Unnamed: 0,year,stormchange
160,2011,-0.048
161,2012,-0.05
162,2013,-0.211
163,2014,-0.4
164,2015,0.333


In [25]:
# check datatypes for hurricane data
print(cropHurr.dtypes)

ID              object
Name            object
Date             int64
Time             int64
Event           object
Status          object
Latitude        object
Longitude       object
Maximum Wind     int64
dtype: object


In [26]:
# remove last characters from latitude 'N' and longitude 'W' columns
cropHurr["Latitude"] = cropHurr["Latitude"].astype(str).str[:-1]
cropHurr["Longitude"] = cropHurr["Longitude"].astype(str).str[:-1]
cropHurr.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind
0,AL011851,UNNAMED,18510625,0,,HU,28.0,94.8,80
1,AL011851,UNNAMED,18510625,600,,HU,28.0,95.4,80
2,AL011851,UNNAMED,18510625,1200,,HU,28.0,96.0,80
3,AL011851,UNNAMED,18510625,1800,,HU,28.1,96.5,80
4,AL011851,UNNAMED,18510625,2100,L,HU,28.2,96.8,80


In [28]:
# make W - longitude values negative 
cropHurr.Longitude = cropHurr.Longitude.astype(float)*(-1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [29]:
cleanHurr = cropHurr
cleanHurr.tail(10)

Unnamed: 0,ID,Name,Date,Time,Event,Status,Latitude,Longitude,Maximum Wind
49095,AL122015,KATE,20151111,600,,HU,35.2,-67.6,70
49096,AL122015,KATE,20151111,1200,,HU,36.2,-62.5,75
49097,AL122015,KATE,20151111,1800,,HU,37.6,-58.2,65
49098,AL122015,KATE,20151112,0,,EX,38.9,-55.0,65
49099,AL122015,KATE,20151112,600,,EX,40.0,-52.0,65
49100,AL122015,KATE,20151112,1200,,EX,41.3,-50.4,55
49101,AL122015,KATE,20151112,1800,,EX,41.9,-49.9,55
49102,AL122015,KATE,20151113,0,,EX,41.5,-49.2,50
49103,AL122015,KATE,20151113,600,,EX,40.8,-47.5,45
49104,AL122015,KATE,20151113,1200,,EX,40.7,-45.4,45


In [6]:
#Define connection link an instantiate client
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#Define hurricane database in Mongo
db = client.hurricaneFT

hurr = db.by_name

In [9]:
#Instantiate for loop for populating hurricane collection
for index, row in cropHurr.iterrows():
    #Populate post dictionary with information to be inserted into business collection
    post = {
        "ID": str(row["ID"]),
        "Name": str(row["Name"]),
        "Date": str(row["Date"]),
        "Time": str(row["Time"]),
        "Event": str(row["Event"]),
        "Status": str(row["Status"]),
        "Latitude": str(row["Latitude"]),
        "Longitude": str(row["Longitude"]),
        "Maximum Wind": str(row["Maximum Wind"])
    }
    #Insert data into business collection, post{} by post{}
    hurr.insert_one(post)