# <span style='color:Blue'> TIME SERIES </span>

### Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
import datetime

import statsmodels.api as sm

%matplotlib inline
sns.set_style('whitegrid')
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg')

### Load Data

In [None]:
# Read in hourly bitcoin price from conbase - price data provided via http://bitcoinity.org
btc_data = pd.read_csv('http://data.bitcoinity.org/export_data.csv?currency=USD&data_type=price_volume&t=lb&timespan=all&vu=curr', parse_dates=['Time'])

# Set the date/time to be the index for the dataframe
btc_data.set_index('Time', inplace=True)
print(btc_data.head())


date_min = btc_data.index.min()
date_max = btc_data.index.max()
print('Minimum date from data set: {}'.format(date_min))
print('Maximum date from data set: {}'.format(date_max))

In [None]:
# Task: Resample the data and handle the missing values with different method

In [None]:
# The 'MS' string groups the data in buckets by start of the month
btc_data = btc_data['price'].resample('MS').mean()

# The term bfill means that we use the value before filling in missing values
btc_data = btc_data.fillna(btc_data.mean())

btc_data = pd.DataFrame(btc_data)

In [None]:
plt.figure()
sns.lineplot(x='Time',y='price',data=btc_data)
plt.title('Bitcoin price',fontsize=12)
plt.xlabel('year',fontsize=10)
plt.ylabel('price',fontsize=10)
plt.show()

In [None]:
from pandas.plotting import lag_plot

lag_plot(btc_data)

In [None]:
# What can you observe from the lag plot?

In [None]:
# Let's plot ACF & PACF graphs to visualize AR & MA components
import statsmodels.tsa.api as smt


fig, axes = plt.subplots(1, 2)
fig.set_figwidth(7.5)
fig.set_figheight(3)
smt.graphics.plot_acf(btc_data['price'], lags=30, ax=axes[0], alpha=0.5)
smt.graphics.plot_pacf(btc_data['price'], lags=30, ax=axes[1], alpha=0.5)
plt.tight_layout()

#### Moving Average Smoothing


Smoothing is a technique applied to time series to remove the fine-grained variation between time steps. The hope of smoothing is to remove noise and better expose the signal of the underlying causal processes. Moving averages are a simple and common type of smoothing used in time series analysis and time series forecasting. Calculating a moving average involves creating a new series where the values are comprised of the average of raw observations in the original time series.
Also, moving average can help us to identify trends in time series. Becouse we are taking the average, it tends to smooth out noise and seasonality.

In [None]:
# Task: Try different window size

In [None]:
# Let's plot the 30-Month Moving Rolling Mean and find Insights
# Rolling Statistics
btc_data["Moving_Average"] = btc_data['price'].rolling(window=30, center=True).mean()

sns.lineplot(x=btc_data.index, y="price", data=btc_data)
sns.lineplot(x=btc_data.index, y="Moving_Average", data=btc_data)
plt.xticks(rotation=15)
plt.show()

#### Seasonal Patterns in Time Series

One way to think about the seasonal components to the time series of your data is to remove the trend from a time series, so that you can more easily investigate seasonality. To remove the trend, you can subtract the trend you computed above (rolling mean) from the original signal. This, however, will be dependent on how many data points you averaged over.

In [None]:
btc_data["Trend_Corrected"] = btc_data["price"] - btc_data["Moving_Average"]

sns.lineplot(x=btc_data.index, y="Trend_Corrected", data=btc_data)
plt.xticks(rotation=15)
plt.show()

In [None]:
# What can you observe from the plots above?

### Decomposing: Eliminating trend and seasonality

Time series decomposition involves thinking of a series as a combination of trend, seasonality, and noise components. Decomposition provides a useful abstract model for thinking about time series generally and for better understanding problems during time series analysis and forecasting.

In [None]:
#Task: Try different numbers for period

In [None]:
decomposition_add = sm.tsa.seasonal_decompose(btc_data["price"], period=12, model="additive")
# comment: seasonal_decompose expects index to be datetime format
fig = decomposition_add.plot()
plt.show()

In [None]:
def hist(series):
    fig, ax= plt.subplots()
    sns.distplot(series, ax=ax, hist_kws={'alpha': 0.8, 'edgecolor':'black', 'color': 'blue'},  
                 kde_kws={'color': 'black', 'alpha': 0.7})
    sns.despine()
    return fig, ax

hist(decomposition_add.resid)
plt.show()

## Naive Forecast

In [None]:
#Task: Implement different forecasting method

In [None]:
tscv = TimeSeriesSplit(n_splits=4)

btc_price = btc_data['price']
plt.figure(1)
index = 1

for train_index, test_index in tscv.split(btc_price):
    
    train = btc_price.iloc[train_index]
    test = btc_price.iloc[test_index]

    print('Observations: %d' % (len(train) + len(test)))
    print('Training Observations: %d' % (len(train)))
    print('Testing Observations: %d' % (len(test)))

    
    dd = np.asarray(train)

    lastvalue = dd[len(dd)-1]

    y_hat = pd.DataFrame()
    y_hat['co2'] = test.values
    
    y_hat['naive'] = lastvalue
    
    y_hat.index = test.index

    plt.figure()
    sns.lineplot(x=train.index, y=train, label='Train')
    sns.lineplot(x=test.index, y=test, label='Test')
    sns.lineplot(x=y_hat.index, y=y_hat['naive'], label='Naive Forecast')
    plt.legend(loc='best')
    plt.title("Naive Forecast")
    plt.show()
    rms = sqrt(mean_squared_error(test, y_hat.naive))
    mae = mean_absolute_error(test, y_hat.naive)
    print('RMSE = '+str(rms))
    print('MAE = '+str(mae))

    index += 1

    plt.show()