In [None]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
%matplotlib inline
%load_ext watermark
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# change default style figure and font size
plt.rcParams['figure.figsize'] = 8, 6
plt.rcParams['font.size'] = 12

%watermark -a 'Ethen' -d -t -v -p numpy,pandas,sklearn,matplotlib

In [None]:
input_path = 'PRSA_data_2010.1.1-2014.12.31.csv'
df = pd.read_csv(input_path)
print('dimension: ', df.shape)
df.head()

In [None]:
from datetime import datetime

def parse(x):
    return datetime.strptime(x, '%Y %m %d %H')

input_path = 'PRSA_data_2010.1.1-2014.12.31.csv'
dataset = pd.read_csv(input_path, parse_dates = {'date': ['year', 'month', 'day', 'hour']},
                      index_col=0, date_parser=parse)

dataset.drop('No', axis=1, inplace=True)
# manually specify column names
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
# mark all NA values with 0
dataset['pollution'].fillna(0, inplace=True)
# drop the first 24 hours
dataset = dataset[24:]
# summarize first 5 rows
dataset.head(5)

In [None]:
# change default style figure and font size
plt.rcParams['figure.figsize'] = 8, 6
plt.rcParams['font.size'] = 12

# we won't be plotting the wind direction variable
plot_features = ['pollution', 'dew', 'temp', 'press', 'wnd_spd', 'snow', 'rain']
n_features = len(plot_features)

fig = plt.figure(figsize=(12, 8))
for i, col in enumerate(plot_features):
    plt.subplot(n_features, 1, i + 1)
    plt.plot(dataset[col].values)
    plt.title(col, loc='left')

plt.tight_layout()
plt.show()