# Time series analysis

[Datacamp](https://campus.datacamp.com/courses/time-series-analysis-in-python).



In [22]:
import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats


In [2]:
stocks_and_bonds = pd.read_csv('../data/stocks_and_bonds.csv')
stocks_and_bonds.set_index('observation_date', inplace=True)
stocks_and_bonds.index = pd.to_datetime(stocks_and_bonds.index)
stocks_and_bonds.head()

In [3]:
series = stocks_and_bonds.pct_change()
series.head()

In [4]:
correlation = series.corr()
print("Correlation of stocks and interest rates: ", correlation)

In [5]:
plt.scatter(series['SP500'], series['US10Y'])
plt.xlim(-0.2, 0.2)
plt.ylim(-0.2, 0.2)
plt.show()

# Linear regression

In [6]:
df = pd.read_csv('../data/regression_example_data.csv')
df.head()

In [7]:
x = df['x']
y = df['y']

In [8]:
df1 = sm.add_constant(df['x'].to_frame())
mdl = sm.OLS(y, df1).fit()
print(mdl.params)
print(mdl.summary())

In [9]:
np.polyfit(x, y, deg=1)

In [10]:
mdl = stats.linregress(x, y)
print(mdl)

# Autocorrelation

In [11]:
df = pd.read_csv('../data/stocks_and_bonds.csv')
df.set_index('observation_date', inplace=True)
df.index = pd.to_datetime(df.index)
sp500 = df['SP500']
sp500.head()

In [12]:
sp500_monthly = sp500.resample(rule='M').last()
sp500_monthly = sp500_monthly.pct_change()
sp500_monthly.head()

In [13]:
auto_correlation = sp500_monthly.autocorr()  # lag 1
auto_correlation

 # Autocorrelation function (ACF)

In [18]:
from statsmodels.tsa.stattools import acf
from statsmodels.graphics.tsaplots import plot_acf

df = pd.read_csv('../data/earnings_timeseries.csv')
df.set_index('Date', inplace=True)
df.index = pd.to_datetime(df.index)
df.head()

In [19]:
# Compute the ACF array of HRB
acf_array = acf(df)
print(acf_array)

# Plot the ACF function
plot_acf(df, alpha=1)
plt.show()

## Confidence intervals

In [25]:
auto_correlation = df['Earnings'].autocorr()  # lag = 1
print("The auto-correlation for lag = 1 is %4.2f" % auto_correlation)

# Compute the approximate confidence interval
# 95% of a normal curve is between +1.96 and -1.96 standard deviations from the mean
n_obs = len(df)
conf = 1.96 / math.sqrt(n_obs)
print("The approximate confidence interval for lag = 1 is +/- %4.2f" % conf)

# Plot the auto-correlation function with 95% confidence intervals and 20 lags using plot_acf
plot_acf(df['Earnings'], alpha=0.05, lags=20)
plt.show()

# White noise


* Definition (one viable one)
    * Constant mean,
    * Constant variance, 
    * Zero auto-correlation at all lags.
* Normal distribution => Gaussian white noise.
* Stock returns are often modeled as white noise.

In [27]:
# Generate white noise series
series = np.random.normal(loc=0.02, scale=0.05, size=1000)
mean = np.mean(series)
std = np.std(series)
print("The mean is %5.3f and the standard deviation is %5.3f" % (mean, std))
plt.plot(series)
plt.show()

In [28]:
# Plot auto-correlation function of white noise returns
plot_acf(series, lags=20)
plt.show()

# Random walk

* Today's price = yesterday's price + noise.
* More theory in LyX. 

## Random walk - stock example

In [29]:
# Generate 500 random steps
steps = np.random.normal(loc=0, scale=1, size=500)
steps[0] = 0  # Set first step to 0

# Generate stock prices, P with a starting price of 100
P = 100 + np.cumsum(steps)

# Plot the simulated stock prices
plt.plot(P)
plt.title("Simulated Random Walk")
plt.show()

## Random walk with drift

In [30]:
# Generate 500 random steps
steps = np.random.normal(loc=0.001, scale=0.01, size=500) + 1
steps[0] = 1  # Set 1st element to neutral

# Simulate the stock price, P
P = 100 * np.cumprod(steps)

# Plot the simulated stock prices
plt.plot(P)
plt.title("Simulated Random Walk with Drift")
plt.show()

In [35]:
from statsmodels.tsa.stattools import adfuller  # Augmented Dickey-Fuller (ADF) test for random walk

# ADF test 
results = adfuller(df['Earnings'])
print(f'The p-value of the test on prices is {results[1] * 100:.1f} %')

# Stationarity