In [11]:
import yfinance as yf
import datetime
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns

In [12]:
import warnings

# Disable all warnings
warnings.filterwarnings("ignore")

In [13]:
# Define a pre-list set of stock tickers that we would like to invest in
tickers = ['AAPL', 'AMD', 'AMZN', 'CCJ', 'COST', 'GOOG', 'GS', 'JPM', 'LLY', 'META', 'MSFT', 'NEE', 'PFE', 'SAP', 'WMT']
measurement_days = 10
remaining_days = 30-measurement_days

In [14]:
# Get 5 year adjusted close prices for all stocks as per instruction

stock_prices = {}
short_window_prices = {}
future_prices = {}

for ticker in tickers:
    data = yf.download(ticker,period="5y")
    print(ticker)
    stock_prices[ticker] = data['Adj Close'].tolist()
    stock_data = stock_prices[ticker]
    prices_x = [stock_data[i:i+measurement_days] for i in range(len(stock_data)-remaining_days)]
    prices_y = [stock_data[i+measurement_days] for i in range(measurement_days,len(stock_data)-measurement_days)]

    prices_test = [stock_data[i:i+measurement_days] for i in range(len(stock_data)-remaining_days,len(stock_data)-measurement_days)]

    short_window_prices[ticker] = (prices_x, prices_y, prices_test)

    rf = RandomForestRegressor()
    rf.fit(prices_x,prices_y)
    # Predict stock price for 10 future days
    future_prices[ticker] = rf.predict(prices_test)



[*********************100%***********************]  1 of 1 completed
AAPL
[*********************100%***********************]  1 of 1 completed
AMD
[*********************100%***********************]  1 of 1 completed
AMZN
[*********************100%***********************]  1 of 1 completed
CCJ
[*********************100%***********************]  1 of 1 completed
COST
[*********************100%***********************]  1 of 1 completed
GOOG
[*********************100%***********************]  1 of 1 completed
GS
[*********************100%***********************]  1 of 1 completed
JPM
[*********************100%***********************]  1 of 1 completed
LLY
[*********************100%***********************]  1 of 1 completed
META
[*********************100%***********************]  1 of 1 completed
MSFT
[*********************100%***********************]  1 of 1 completed
NEE
[*********************100%***********************]  1 of 1 completed
PFE
[*********************100%********************

In [15]:
future_prices = pd.DataFrame(future_prices)

# Construct covariance matrix of future stock prices
# cov_matrix = risk_models.sample_cov(future_prices)
S = risk_models.CovarianceShrinkage(future_prices).ledoit_wolf()
# plotting.plot_covariance(S, plot_correlation=True)

# Use capm to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)
print(mu)
# Do mean variance optimization using efficient frontier
rf_ef = EfficientFrontier(mu, S, weight_bounds=(0.05, 0.1))
rf_ef.min_volatility()
weights = rf_ef.clean_weights()
print(weights)

print(rf_ef.portfolio_performance(verbose=True))

AAPL   -0.000861
AMD    -0.664725
AMZN   -1.353119
CCJ    -0.233142
COST    0.121496
GOOG   -1.134059
GS     -0.611966
JPM    -1.198057
LLY     0.014847
META   -0.477784
MSFT    0.122610
NEE    -0.669235
PFE    -0.965398
SAP    -0.425827
WMT    -0.295725
Name: mkt, dtype: float64
OrderedDict([('AAPL', 0.09043), ('AMD', 0.06311), ('AMZN', 0.05), ('CCJ', 0.06853), ('COST', 0.08097), ('GOOG', 0.05), ('GS', 0.0588), ('JPM', 0.05), ('LLY', 0.09084), ('META', 0.06386), ('MSFT', 0.09415), ('NEE', 0.05137), ('PFE', 0.05), ('SAP', 0.06965), ('WMT', 0.06829)])
Expected annual return: -41.9%
Annual volatility: 7.3%
Sharpe Ratio: -5.97
(-0.41853709892559426, 0.0734739410300956, -5.968607274597745)


In [16]:
future_prices = {}

for ticker in tickers:
    # data = yf.download(ticker, period="5y")
    # print(ticker)
    # stock_prices[ticker] = data['Adj Close'].tolist()
    # stock_data = stock_prices[ticker]
    # prices_x = [stock_data[i:i + measurement_days] for i in range(len(stock_data) - remaining_days)]
    # prices_y = [stock_data[i + measurement_days] for i in range(measurement_days, len(stock_data) - measurement_days)]

    # prices_test = [stock_data[i:i + measurement_days] for i in
    #                range(len(stock_data) - remaining_days, len(stock_data) - measurement_days)]

    # short_window_prices[ticker] = (prices_x, prices_y, prices_test)

    gb = GradientBoostingRegressor()
    gb.fit(short_window_prices[ticker][0], short_window_prices[ticker][1])
    # Predict stock price for 10 future days
    future_prices[ticker] = gb.predict(short_window_prices[ticker][2])

future_prices = pd.DataFrame(future_prices)

# Convert future prices DataFrame to numpy array
future_prices_array = future_prices.to_numpy()

# Construct covariance matrix of future stock prices
S = risk_models.sample_cov(future_prices_array)

# Use CAPM to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)
print(mu)

# Do mean-variance optimization using efficient frontier
gb_ef = EfficientFrontier(mu, S, weight_bounds=(0.05, 0.1))
weights = gb_ef.min_volatility()
cleaned_weights = gb_ef.clean_weights()

# Print the optimized weights
print(cleaned_weights)
print(gb_ef.portfolio_performance(verbose=True))


AAPL    0.018616
AMD    -1.475716
AMZN   -0.162857
CCJ    -3.171556
COST   -0.129988
GOOG   -0.553733
GS     -0.253410
JPM    -1.065257
LLY     0.024606
META   -0.035766
MSFT    0.091550
NEE    -0.475177
PFE    -0.524315
SAP    -0.369805
WMT    -0.680163
Name: mkt, dtype: float64
OrderedDict([('AAPL', 0.1), ('AMD', 0.05), ('AMZN', 0.0609), ('CCJ', 0.05), ('COST', 0.05044), ('GOOG', 0.05), ('GS', 0.1), ('JPM', 0.05), ('LLY', 0.1), ('META', 0.1), ('MSFT', 0.08866), ('NEE', 0.05), ('PFE', 0.05), ('SAP', 0.05), ('WMT', 0.05)])
Expected annual return: -44.9%
Annual volatility: 8.3%
Sharpe Ratio: -5.63
(-0.44873914392089226, 0.08323091207505513, -5.631791509123406)


In [17]:
future_prices = {}

for ticker in tickers:

    nn = MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=1000)  # Example configuration, you can adjust this
    nn.fit(short_window_prices[ticker][0], short_window_prices[ticker][1])
    # Predict stock price for 10 future days
    future_prices[ticker] = nn.predict(short_window_prices[ticker][2])

future_prices = pd.DataFrame(future_prices)

# Convert future prices DataFrame to numpy array
future_prices_array = future_prices.to_numpy()

# Construct covariance matrix of future stock prices
S = risk_models.sample_cov(future_prices_array)

# Use CAPM to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)

# Do mean-variance optimization using efficient frontier
nn_ef = EfficientFrontier(mu, S, weight_bounds=(0.05, 0.1))
weights = nn_ef.min_volatility()
cleaned_weights = nn_ef.clean_weights()

# Print the optimized weights
print(cleaned_weights)


OrderedDict([('AAPL', 0.05), ('AMD', 0.05), ('AMZN', 0.1), ('CCJ', 0.05), ('COST', 0.07149), ('GOOG', 0.05), ('GS', 0.05), ('JPM', 0.05), ('LLY', 0.1), ('META', 0.07851), ('MSFT', 0.05), ('NEE', 0.1), ('PFE', 0.05), ('SAP', 0.05), ('WMT', 0.1)])


In [18]:
rf_ef.portfolio_performance(verbose=True)
print("\n")
gb_ef.portfolio_performance(verbose=True)
print("\n")
nn_ef.portfolio_performance(verbose=True)
print("\n")


Expected annual return: -41.9%
Annual volatility: 7.3%
Sharpe Ratio: -5.97


Expected annual return: -44.9%
Annual volatility: 8.3%
Sharpe Ratio: -5.63


Expected annual return: -33.3%
Annual volatility: 3.6%
Sharpe Ratio: -9.79




In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from pypfopt import risk_models, expected_returns
from pypfopt.efficient_frontier import EfficientFrontier

tickers = ['AAPL', 'AMD', 'AMZN', 'CCJ', 'COST', 'GOOG', 'GS', 'JPM', 'LLY', 'META', 'MSFT', 'NEE', 'PFE', 'SAP', 'WMT']
measurement_days = 10
remaining_days = 30 - measurement_days

stock_prices = {}
short_window_prices = {}
future_prices = {}

for ticker in tickers:
    data = yf.download(ticker, period="5y")
    stock_prices[ticker] = data['Adj Close'].tolist()
    stock_data = stock_prices[ticker]
    prices_x = [stock_data[i:i + measurement_days] for i in range(len(stock_data) - remaining_days)]
    prices_y = [stock_data[i + measurement_days] for i in range(measurement_days, len(stock_data) - measurement_days)]

    prices_test = [stock_data[i:i + measurement_days] for i in
                   range(len(stock_data) - remaining_days, len(stock_data) - measurement_days)]

    short_window_prices[ticker] = (prices_x, prices_y, prices_test)

# Define the models to evaluate
models = [
    ('Random Forest', RandomForestRegressor(), {'n_estimators': [50, 100, 200]}),
    ('Gradient Boosting', GradientBoostingRegressor(), {'n_estimators': [50, 100, 200]}),
    ('Neural Network', MLPRegressor(), {'hidden_layer_sizes': [(100,), (100, 100), (200, 100)]})
]

best_model = None
best_model_name = ''
best_score = float('-inf')

# Perform grid search and evaluate each model
for model_name, model, param_grid in models:
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(prices_x, prices_y)
    score = grid_search.best_score_
    print(f"{model_name}: Best Score = {score}, Best Params = {grid_search.best_params_}")
    if score > best_score:
        best_score = score
        best_model = grid_search.best_estimator_
        best_model_name = model_name

print(f"\nBest Model: {best_model_name}")
print(best_model)

# Predict stock price for 10 future days using the best model
future_prices = {}
for ticker, (_, _, prices_test) in short_window_prices.items():
    future_prices[ticker] = best_model.predict(prices_test)

future_prices = pd.DataFrame(future_prices)

# Convert future prices DataFrame to numpy array
future_prices_array = future_prices.to_numpy()

# Construct covariance matrix of future stock prices
S = risk_models.sample_cov(future_prices_array)

# Use CAPM to find expected returns on future prices
mu = expected_returns.capm_return(future_prices)

# Do mean-variance optimization using efficient frontier
ef = EfficientFrontier(mu, S)
weights = ef.min_volatility()
ef.portfolio_performance(verbose=True)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********



Neural Network: Best Score = -30.718828009402245, Best Params = {'hidden_layer_sizes': (100,)}

Best Model: Neural Network
MLPRegressor()
Expected annual return: -0.5%
Annual volatility: 0.9%
Sharpe Ratio: -2.70




(-0.0049648857484028, 0.00924654716517848, -2.699914389926861)