In [4]:
# import
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# set a few plotting defaults
%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rcParams['font.size'] = 18
plt.rcParams['patch.edgecolor'] = 'k'

In [5]:
# load weather data
df_weatherNov2018 = pd.read_csv('nov_2018_centralpark_weather.csv')

In [6]:
df_weatherNov2018.head(5)

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WT01,WT02,WT08
0,USW00094728,"NY CITY CENTRAL PARK, NY US",40.77898,-73.96925,42.7,2018-11-01,0.0,0.0,0.0,,70,57,,,
1,USW00094728,"NY CITY CENTRAL PARK, NY US",40.77898,-73.96925,42.7,2018-11-02,0.22,0.0,0.0,,72,64,1.0,,
2,USW00094728,"NY CITY CENTRAL PARK, NY US",40.77898,-73.96925,42.7,2018-11-03,0.37,0.0,0.0,,65,48,1.0,,
3,USW00094728,"NY CITY CENTRAL PARK, NY US",40.77898,-73.96925,42.7,2018-11-04,0.0,0.0,0.0,,55,43,,,
4,USW00094728,"NY CITY CENTRAL PARK, NY US",40.77898,-73.96925,42.7,2018-11-05,0.3,0.0,0.0,,53,48,1.0,,


In [7]:
df_weatherNov2018.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 15 columns):
STATION      30 non-null object
NAME         30 non-null object
LATITUDE     30 non-null float64
LONGITUDE    30 non-null float64
ELEVATION    30 non-null float64
DATE         30 non-null object
PRCP         30 non-null float64
SNOW         30 non-null float64
SNWD         30 non-null float64
TAVG         0 non-null float64
TMAX         30 non-null int64
TMIN         30 non-null int64
WT01         16 non-null float64
WT02         1 non-null float64
WT08         2 non-null float64
dtypes: float64(10), int64(2), object(3)
memory usage: 3.6+ KB


### Background on column labels in weather DataFrame

- STATION, NAME, LATITUDE, and LONGITUDE are going to be all the same (data is from NOAA's Central Park location)
- ELEVATION: height above sea level (which will be the same since the observations are all in NYC)
- DATE: day of observation
- PRCP: precipitation
- SNOW: snowfall
- SNWD: snow depth
- TAVG: average temperature
- TMAX: maximum temperature
- TMIN: minimum temperature
- WT**: weather types
    - WT01 = Fog, ice fog, or freezing fog (may include heavy fog)
    - WT02 = Heavy fog or heavy freezing fog (not always distinguished from fog)
    - WT08 = Smoke or haze

In [8]:
# eliminate STATION, NAME, LAT, LONG, ELEVATION columns
df_weatherNov2018 = df_weatherNov2018.drop(['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION'], axis=1)

In [9]:
df_weatherNov2018.head(5)

Unnamed: 0,DATE,PRCP,SNOW,SNWD,TAVG,TMAX,TMIN,WT01,WT02,WT08
0,2018-11-01,0.0,0.0,0.0,,70,57,,,
1,2018-11-02,0.22,0.0,0.0,,72,64,1.0,,
2,2018-11-03,0.37,0.0,0.0,,65,48,1.0,,
3,2018-11-04,0.0,0.0,0.0,,55,43,,,
4,2018-11-05,0.3,0.0,0.0,,53,48,1.0,,
