In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import warnings

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score, LeaveOneOut

warnings.filterwarnings('ignore')

In [None]:
data = yf.download(['AAPL', 'IBM'], period='1Y')['Adj Close']
returns = np.log(data).diff().dropna()

fig, ax = plt.subplots(figsize=(10, 6))

returns.plot(kind='scatter', x='IBM', y='AAPL', ax=ax)

n = returns.shape[0]

MSE_list = list()

for i in range(n):
    train = returns.drop(returns.index[i])
    valid = returns.iloc[[i]]

    model = LinearRegression()
    model.fit(train[['IBM']], train[['AAPL']])
    pred = model.predict(valid[['IBM']])[0][0]

    MSE_list.append((valid[['AAPL']] - pred)**2)

CV = np.mean(MSE_list)

print('Cross-validation MSE: ', CV)

## Cross validation

In [None]:
df = pd.read_csv('Auto.csv')

display(df)

fig, (ax, ax2) = plt.subplots(2, figsize=(10, 10))

df.plot(kind='scatter', ax=ax, x='horsepower', y='mpg', c='cylinders', colormap='viridis')

# fit polynomial regression model
X = df[['horsepower']]
y = df['mpg']

MSE_list_LOO = list()
MSE_list_10 = list()

# use loo cross validation

for i in range(1, 11):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(X)
    
    LOO = LeaveOneOut()
    model = LinearRegression()
    MSE_list_LOO.append(-cross_val_score(model, X_poly, y, cv=LOO, scoring='neg_mean_squared_error').mean())
    MSE_list_10.append(-cross_val_score(model, X_poly, y, cv=10, scoring='neg_mean_squared_error').mean())

ax2.plot(range(1, 11), MSE_list_LOO, marker='o', color='red', label='LOO')
ax2.plot(range(1, 11), MSE_list_10, marker='o', color='green', label='10-fold')
ax2.set_xlabel('Degree')
ax2.set_ylabel('CV MSE')
ax2.grid(alpha=0.3)
ax2.legend()

