In [15]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

In [16]:
weather_data = pd.read_csv("Resources/Weather Data/GlobalLandTemperaturesByState.csv")

weather_data.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil


In [17]:
# Drop unnecessary columns
weather_data = weather_data.drop(["AverageTemperatureUncertainty"], axis=1)

In [18]:
# change the dt column to datetime
weather_data["dt"] = pd.to_datetime(weather_data["dt"], format="%Y-%m-%d")
weather_data

Unnamed: 0,dt,AverageTemperature,State,Country
0,1855-05-01,25.544,Acre,Brazil
1,1855-06-01,24.228,Acre,Brazil
2,1855-07-01,24.371,Acre,Brazil
3,1855-08-01,25.427,Acre,Brazil
4,1855-09-01,25.675,Acre,Brazil
...,...,...,...,...
645670,2013-05-01,21.634,Zhejiang,China
645671,2013-06-01,24.679,Zhejiang,China
645672,2013-07-01,29.272,Zhejiang,China
645673,2013-08-01,29.202,Zhejiang,China


In [19]:
#only selecting data from the United States and from 1900 and on
us_weather = weather_data.loc[weather_data["Country"] == "United States"]
us_weather = us_weather.loc[us_weather["dt"] > "1875-01-01"]


#selecting only the data from california, florida, and illinois
ca_weather = us_weather.loc[us_weather["State"] == "California"].copy()
fl_weather = us_weather.loc[us_weather["State"] == "Florida"].copy()
il_weather = us_weather.loc[us_weather["State"] == "Illinois"].copy()

In [20]:
#concatenating the data together into one dataframe
frames = [ca_weather, fl_weather, il_weather]

state_weather = pd.concat(frames)

In [21]:
# adding a column for the Average Temperature in Degrees Fahrenheit
state_weather["Average Temperature (F)"] = (state_weather["AverageTemperature"] * (9 / 5)) + 32

# converting float type columns to integers and rounding to the nearest whole number
state_weather['AverageTemperature'] = state_weather['AverageTemperature'].astype(np.int64)
state_weather['Average Temperature (F)'] = state_weather['Average Temperature (F)'].astype(np.int64)


state_weather

Unnamed: 0,dt,AverageTemperature,State,Country,Average Temperature (F)
71371,1875-02-01,7,California,United States,45
71372,1875-03-01,8,California,United States,47
71373,1875-04-01,13,California,United States,57
71374,1875-05-01,18,California,United States,64
71375,1875-06-01,21,California,United States,70
...,...,...,...,...,...
173516,2013-05-01,18,Illinois,United States,64
173517,2013-06-01,22,Illinois,United States,72
173518,2013-07-01,23,Illinois,United States,74
173519,2013-08-01,23,Illinois,United States,73


In [22]:
#Rename Column Headers
state_weather.rename(columns={"dt": "Date", "AverageTemperature":"Average Temperature (C)"})

Unnamed: 0,Date,Average Temperature (C),State,Country,Average Temperature (F)
71371,1875-02-01,7,California,United States,45
71372,1875-03-01,8,California,United States,47
71373,1875-04-01,13,California,United States,57
71374,1875-05-01,18,California,United States,64
71375,1875-06-01,21,California,United States,70
...,...,...,...,...,...
173516,2013-05-01,18,Illinois,United States,64
173517,2013-06-01,22,Illinois,United States,72
173518,2013-07-01,23,Illinois,United States,74
173519,2013-08-01,23,Illinois,United States,73


In [23]:
state_weather.to_csv("Resources/Cleaned Weather Data/StateWeatherCleaned.csv")