<a href="https://colab.research.google.com/github/data-analytics-workshop/python/blob/master/005_case_study_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Case Study 2 - Time Series

**Import Libraries**

In [0]:
# Import Library for Data Manipulation
import pandas as pd
import numpy as np

In [0]:
# Import Library for Visualization
import seaborn as sns
import matplotlib.pyplot as plt

**Import Data**

In [0]:
# Set Date Parser
mydateparser = lambda x: pd.datetime.strptime(x, "%d-%b-%y")

In [0]:
# Import Dataset
df_house = pd.read_csv('https://raw.githubusercontent.com/dianrdn/data/master/House-index-canberra.csv', sep = ',', parse_dates=[0], index_col=0, squeeze=True, date_parser=mydateparser)
df_house

In [0]:
# Prints the Dataset Information
df_house.info()

**Explore Dataset**

In [0]:
# Explore Dataset
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
df_house.plot()
plt.xlabel('year')
plt.ylabel('house index')
plt.title('Overtime House Index')

**Time Series - ARIMA Modeling**

In [0]:
# Modeling and Show Result Summary
arima = ARIMA(df_house, order=(5,1,0))
arima_fit = arima.fit(disp=0)
print(arima_fit.summary())

In [0]:
# Visualize Residuals
residuals = pd.DataFrame(arima_fit.resid)
residuals.plot()
plt.show()

In [0]:
from sklearn.metrics import mean_squared_error

df_house_values = df_house.values
size = int(len(df_house_values) * 0.5)
train, test = df_house_values[0:size], df_house_values[size:len(df_house_values)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
	arima = ARIMA(history, order=(5,1,0))
	arima_fit = arima.fit(disp=0)
	output = arima_fit.forecast()
	predicted = output[0]
	predictions.append(predicted)
	actual = test[t]
	history.append(actual)
	print('predicted=%f, actual=%f' % (predicted, actual))
error = mean_squared_error(actual, predictions)
print('Test MSE: %.3f' % error)

In [0]:
# plot
plt.plot(actual, color='blue', label='actual')
plt.plot(predictions, color='red', label='prediction')
plt.xlabel('year')
plt.ylabel('house index')
plt.title('Overtime House Index')
plt.legend()
plt.show()