In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yfin
import math
import matplotlib_inline.backend_inline 
import statsmodels.api as sm
from patsy import dmatrices
import random

matplotlib_inline.backend_inline.set_matplotlib_formats('pdf', 'png')

plt.rcParams['savefig.dpi'] = 75

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 14

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"

In [None]:
# 1.1 SPY vs AAPL CAPM Model 

data = yfin.download(tickers=['SPY', 'AAPL'], start='2010-01-01', end='2022-01-01', interval='1d')
data = data['Adj Close'].pct_change().fillna(method='bfill')

y, X = dmatrices(f"AAPL ~ SPY", data=data, return_type="dataframe")

model = sm.OLS(y, X) 
results = model.fit()

results.params

In [None]:
# 1.2 Residuals vs. Fitted

fittedValues = results.fittedvalues
residuals = y['AAPL'] - fittedValues

plt.scatter(fittedValues, residuals)
plt.title('Residuals vs Fitted')
plt.xlabel('Fitted')
plt.ylabel('Residuals')
plt.show

In [None]:
# 1.2 Residual Sum of Squares RSS

RSS = sum(np.square(residuals))

RSS

In [None]:
# 1.3 Null Regression Model

X = np.ones(len(y), dtype = int)

null_model = sm.OLS(y, X) 
null_results = null_model.fit()

null_results.params

In [None]:
# 1.3 Alpha in Null Regression Model

np.mean(y)

In [None]:
# 1.4 R Squared Using Formula

null_fittedValues = null_results.fittedvalues

null_residuals = y['AAPL'] - null_fittedValues

null_RSS = sum(np.square(null_residuals))

r_squared = 1 - (RSS/null_RSS)

r_squared


In [None]:
# 1.4 R Squared 

results.rsquared

In [None]:
# 2.3 Simulate Distribution of f_n

def frac_of_red(n):
    red = 1
    green = 1
    for trial in range(n):
        pick = np.random.choice(['red', 'green'], p = [red/(red + green), green/(red + green)])
        if pick == 'red':
            red += 1
        else: 
            green += 1
    return (red/(red + green))

proportion_red = [frac_of_red(10000) for trial in range(1000)]

plt.hist(proportion_red, bins=25)
plt.title('Simulation of f_n')
plt.xlabel('Proportion of Red Balls')
plt.ylabel('Frequency')
plt.show()

In [None]:
# 2.4 (d) Simulate Z_n

def simulations():
    z_previous = 1
    for i in range(10000):
        z = 0
        for j in range(z_previous):
            epsilon = np.random.poisson(1)
            z += epsilon
        z_previous = z
    return z_previous

Z = [simulations() for trial in range(1000)]

plt.hist(Z, bins=30)
plt.title('Simulation of Z_n')
plt.xlabel('Z_n')
plt.ylabel('Frequency')
plt.show()

In [None]:
# 3.3 Game Simulation
probabilities = [0.5, 0.495, 0.490, 0.480, 0.470]

def random_walk(p):
    prob = [p, 1 - p] 
    money = 0
    duration = 0
    while money > -100 and money < 100:
        x = np.random.random(1)
        if x < prob[0]:
            money += 1
        else:
            money -= 1
        duration += 1
    return money, duration

def simulation(p, trials):
    num_wins = 0
    sum_duration = 0
    for trial in range(trials):
        if random_walk(p)[0] == 100:
            num_wins += 1
        sum_duration += random_walk(p)[1]
    prob_win = num_wins/trials
    ave_duration = sum_duration/trials
    return (prob_win, ave_duration) 

stats = [simulation(p, 10000) for p in probabilities]

stats

In [None]:
# 3.5 Stopping Time Simulation

def stopping_time(p):
    prob = [p, 1 - p] 

    money_won = 0
    duration = 0

    while money_won < 1:
        x = np.random.random(1)
        if x < prob[0]:
            money_won += 1
        duration += 1

    return duration

durations = [stopping_time(0.5) for trial in range(1000)]

np.mean(durations)

