### Data cleaning

#### Read measurement and station CSV files as DataFrames

In [1]:
import pandas as pd
import numpy as np

In [2]:
measurements = pd.read_csv("Resources/hawaii_measurements.csv")
measurements.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
stations = pd.read_csv("Resources/hawaii_stations.csv")
stations.head()

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6


#### Inspect the data for NaNs and missing values

* `stations` does not have NaNs or missing values
* `measurements` has NaNs in `prcp` column

In [4]:
stations.isnull().sum() 

station      0
name         0
latitude     0
longitude    0
elevation    0
dtype: int64

In [5]:
measurements.isnull().sum() 

station       0
date          0
prcp       1447
tobs          0
dtype: int64

#### Remove NaNs from `measurements`

In [6]:
measurements_cleaned = measurements.dropna(how="any")
measurements_cleaned.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
5,USC00519397,2010-01-07,0.06,70


#### Save the cleaned CSV files

In [7]:
measurements_cleaned.to_csv("Resources/clean_hawaii_measurements.csv", index=False, header=True)
stations.to_csv("Resources/clean_hawaii_stations.csv", index=False, header=True)