<a href="https://colab.research.google.com/github/anissa762/goldfish/blob/main/goldfish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
# %%
# Install necessary libraries
!pip install --upgrade optuna ta tensorflow torch torchvision
!pip install keras



In [22]:
# %%
# Import Libraries
import pandas as pd
import numpy as np
import glob
import os
from datetime import datetime, timedelta

# Machine Learning Libraries
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# For Technical Indicators
import ta

# For Optuna Hyperparameter Tuning
import optuna
from optuna.pruners import MedianPruner
from optuna.exceptions import TrialPruned

# TensorFlow Libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# PyTorch Libraries
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# For Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# For saving/loading data
import pickle

# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

In [23]:
# %%
# Mount Google Drive for persistent storage
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [24]:
# %%
# Define paths
DATA_PATH = '/content/drive/MyDrive/data/'
STUDIES_DIR = os.path.join(DATA_PATH, 'optuna_studies')
FORECASTS_PATH = os.path.join(DATA_PATH, 'stock_forecasts.pkl')
PERFORMANCE_PATH = os.path.join(DATA_PATH, 'model_performance.pkl')
BEST_PARAMS_PATH = os.path.join(DATA_PATH, 'best_params_dict.pkl')
ELECTION_DATA_PATH = '/content/drive/MyDrive/final_stock_rankings.csv'

# Create necessary directories if they don't exist
os.makedirs(STUDIES_DIR, exist_ok=True)

In [25]:
# %%
# Load existing best parameters if available
if os.path.exists(BEST_PARAMS_PATH):
    with open(BEST_PARAMS_PATH, 'rb') as f:
        best_params_dict = pickle.load(f)
    print("Loaded existing best parameters.")
else:
    best_params_dict = {}
    print("No existing best parameters found. Starting fresh.")

Loaded existing best parameters.


In [26]:
# %%
# Load existing forecasts
if os.path.exists(FORECASTS_PATH):
    with open(FORECASTS_PATH, 'rb') as f:
        stock_forecasts = pickle.load(f)
    print("Loaded existing forecasts.")
else:
    stock_forecasts = {}
    print("No existing forecasts found. Starting fresh.")

Loaded existing forecasts.


In [27]:
# %%
# Load existing model performance data
if os.path.exists(PERFORMANCE_PATH):
    with open(PERFORMANCE_PATH, 'rb') as f:
        model_performance = pickle.load(f)
    print("Loaded existing model performance data.")
else:
    model_performance = []
    print("No existing model performance data found. Starting fresh.")

Loaded existing model performance data.


In [28]:
# %%
# List all CSV files in the directory
csv_files = glob.glob(os.path.join(DATA_PATH, '*_data_cleaned.csv'))

print(f"Total CSV files found: {len(csv_files)}")

Total CSV files found: 260


In [29]:
# %%
# List all CSV files in the directory
csv_files = glob.glob(os.path.join(DATA_PATH, '*_data_cleaned.csv'))

print(f"Total CSV files found: {len(csv_files)}")

Total CSV files found: 260


In [30]:
# %%
# Initialize a dictionary to store DataFrames
stock_data = {}

# Feature Engineering Function
def add_technical_indicators(df):
    df = df.copy()

    # Moving Averages
    df['MA10'] = df['Close'].rolling(window=10).mean()
    df['MA50'] = df['Close'].rolling(window=50).mean()

    # Relative Strength Index
    df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()

    # Moving Average Convergence Divergence
    macd = ta.trend.MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_signal'] = macd.macd_signal()
    df['MACD_diff'] = macd.macd_diff()

    # Bollinger Bands
    bollinger = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
    df['Bollinger_High'] = bollinger.bollinger_hband()
    df['Bollinger_Low'] = bollinger.bollinger_lband()

    # Volume Indicators
    df['Volume_MA20'] = df['Volume'].rolling(window=20).mean()

    # On-Balance Volume
    df['OBV'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()

    # Exponential Moving Averages
    df['EMA10'] = ta.trend.EMAIndicator(df['Close'], window=10).ema_indicator()
    df['EMA50'] = ta.trend.EMAIndicator(df['Close'], window=50).ema_indicator()

    # Drop initial rows with NaN values due to rolling calculations
    df.dropna(inplace=True)

    return df

In [31]:
# %%
# Load and preprocess each CSV file
for file in csv_files:
    stock_name = os.path.basename(file).split('_')[0]
    df = pd.read_csv(file)

    # Convert 'Date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Sort by Date
    df = df.sort_values('Date')

    # Drop rows with any NA values
    df.dropna(inplace=True)

    # Reset index
    df.reset_index(drop=True, inplace=True)

    # Add technical indicators
    df = add_technical_indicators(df)

    # Store in dictionary
    stock_data[stock_name] = df

print("Data loading and preprocessing completed.")

Data loading and preprocessing completed.


In [32]:
# %%
# Load Election Impact Data
if os.path.exists(ELECTION_DATA_PATH):
    election_data = pd.read_csv(ELECTION_DATA_PATH)
    print("Loaded election impact data.")
else:
    election_data = pd.DataFrame()
    print("Election impact data not found.")

Loaded election impact data.


In [33]:
# %%
# Display the first few rows and columns of election impact data to understand its structure
if not election_data.empty:
    print("Election Impact Data Columns:", election_data.columns.tolist())
    display(election_data.head())
else:
    print("No election impact data to display.")

Election Impact Data Columns: ['Ticker', 'InverseCorr2016', 'InverseCorr2020', 'FinalScore']


Unnamed: 0,Ticker,InverseCorr2016,InverseCorr2020,FinalScore
0,WFC,0.16752,-0.035576,0.203096
1,EQT,0.068077,-0.114918,0.182995
2,K,0.074987,-0.091078,0.166065
3,HPE,0.126264,-0.036836,0.1631
4,BAC,0.099325,-0.06252,0.161845


In [34]:
# %%
# Merge 'FinalScore' into each stock's DataFrame
# First, identify the correct column name for stock identifiers
# Common possibilities include 'Stock', 'Ticker', or 'Symbol'
stock_identifier_cols = ['Stock', 'Ticker', 'Symbol']

# Find which of these columns exists in election_data
existing_id_cols = [col for col in stock_identifier_cols if col in election_data.columns]

if not existing_id_cols:
    raise KeyError(f"None of the expected stock identifier columns {stock_identifier_cols} found in election_data.")
elif len(existing_id_cols) > 1:
    print(f"Multiple identifier columns found: {existing_id_cols}. Using the first one: '{existing_id_cols[0]}'.")
    id_col = existing_id_cols[0]
else:
    id_col = existing_id_cols[0]
    print(f"Using '{id_col}' as the stock identifier column for merging.")

# Define the default ElectionImpact value
DEFAULT_ELECTION_IMPACT = 0  # Represents 'no impact'

# Proceed to merge 'FinalScore' based on the identified stock identifier column
for stock in stock_data:
    # Retrieve the 'FinalScore' for the current stock
    score_series = election_data.loc[election_data[id_col] == stock, 'FinalScore']

    if not score_series.empty:
        election_impact = score_series.values[0]
        print(f"Assigned ElectionImpact for {stock}: {election_impact}")
    else:
        # If 'FinalScore' is not specified for the stock, assign the default impact value
        election_impact = DEFAULT_ELECTION_IMPACT
        print(f"No ElectionImpact specified for {stock}. Assigned default impact value: {0}")

    # Assign 'ElectionImpact' to the DataFrame, even if it's empty
    # This ensures the column exists for all DataFrames
    stock_data[stock]['ElectionImpact'] = election_impact

Using 'Ticker' as the stock identifier column for merging.
Assigned ElectionImpact for XYL: -0.0020828702201409
Assigned ElectionImpact for WSM: 0.0654784830021114
Assigned ElectionImpact for WMB: -0.0337722429916873
Assigned ElectionImpact for WST: 0.0552518798394123
Assigned ElectionImpact for XOM: -0.0341210206746801
Assigned ElectionImpact for WY: 0.1239767103734074
Assigned ElectionImpact for WFC: 0.2030960160762678
Assigned ElectionImpact for WTW: 0.1126134281430669
Assigned ElectionImpact for WDC: 0.1162450134542883
Assigned ElectionImpact for ZBH: 0.0520346518539104
Assigned ElectionImpact for WBD: 0.0379008483854373
Assigned ElectionImpact for WELL: 0.0077327173983179
Assigned ElectionImpact for WDAY: 0.0792028281862628
Assigned ElectionImpact for VZ: 0.015503175866263
Assigned ElectionImpact for WAT: -0.0389472240845118
Assigned ElectionImpact for V: 0.0850969349459994
Assigned ElectionImpact for VEEV: -0.0049466658406312
Assigned ElectionImpact for URI: 0.1395217100292884
As

In [35]:
# %%
# Function to determine if a study exists
def study_exists(study_name, storage_name):
    from optuna.storages import RDBStorage
    try:
        storage = RDBStorage(url=storage_name)
        storage.get_study_id_from_name(study_name)
        return True
    except KeyError:
        return False

In [36]:
N_TRIALS_GB = 15

In [37]:
# %%
# Loop through each stock and train Gradient Boosting model
for stock in stock_data:
    # Skip if the stock has already been processed
    if stock in stock_forecasts and 'GradientBoosting' in stock_forecasts[stock]:
        print(f"Skipping Gradient Boosting for {stock} as it has already been processed.")
        continue

    print(f"\nProcessing Gradient Boosting model for stock: {stock}")
    df = stock_data[stock]

    # Define features and target
    feature_cols = ['Open', 'Close', 'High', 'Low', 'Volume', 'MA10', 'MA50', 'RSI',
                   'MACD', 'MACD_signal', 'MACD_diff', 'Bollinger_High', 'Bollinger_Low',
                   'Volume_MA20', 'OBV', 'EMA10', 'EMA50', 'ElectionImpact']

    X = df[feature_cols]
    y = df['Close']

    # Define study name and storage
    study_name = f"gb_study_{stock}"
    storage_name = f"sqlite:///{STUDIES_DIR}/gb_study_{stock}.db"

    # Check if the study already exists
    if study_exists(study_name, storage_name):
        study = optuna.load_study(study_name=study_name, storage=storage_name)
        print(f"Loaded existing Gradient Boosting study for {stock}.")
    else:
        study = optuna.create_study(direction='minimize', study_name=study_name, storage=storage_name, load_if_exists=True, pruner=MedianPruner())
        print(f"Created new Gradient Boosting study for {stock}.")

    # Define the objective function within the loop to capture current X and y
    def objective_gb(trial):
        param = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 500),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20)
        }
        model = GradientBoostingRegressor(**param, random_state=42)
        tscv = TimeSeriesSplit(n_splits=5)
        mse_scores = []
        for i, (train_index, test_index) in enumerate(tscv.split(X)):
            X_train, X_val = X.iloc[train_index], X.iloc[test_index]
            y_train, y_val = y.iloc[train_index], y.iloc[test_index]
            model.fit(X_train, y_train)
            preds = model.predict(X_val)
            mse = mean_squared_error(y_val, preds)
            mse_scores.append(mse)
            # Report intermediate objective value
            trial.report(mse, i+1)
            # Prune trial if not promising
            if trial.should_prune():
                raise TrialPruned()
        return np.mean(mse_scores)

    try:
        # Optimize hyperparameters using Optuna without SklearnPruningCallback
        study.optimize(objective_gb, n_trials=N_TRIALS_GB, timeout=1800)

        best_params = study.best_params
        print(f"Best params for {stock} (Gradient Boosting): {best_params}")

        # Save best parameters
        best_params_dict.setdefault(stock, {})
        best_params_dict[stock]['GradientBoosting'] = best_params

        # Train the best model on the entire dataset
        best_model = GradientBoostingRegressor(**best_params, random_state=42)
        best_model.fit(X, y)

        # Forecasting: Predict the Close price on the next day
        forecast_close = best_model.predict(X.iloc[-1].values.reshape(1, -1))[0]

        # Store the forecast
        stock_forecasts.setdefault(stock, {})
        stock_forecasts[stock]['GradientBoosting'] = forecast_close

        # Store performance
        model_performance.append({'Stock': stock, 'Model': 'GradientBoosting', 'RMSE': study.best_value})

        print(f"Completed Gradient Boosting for {stock}. Forecast: {forecast_close}")

    except TrialPruned:
        print(f"Gradient Boosting trial for {stock} was pruned.")
    except Exception as e:
        print(f"An error occurred while processing Gradient Boosting for {stock}: {e}")

    # Save the best parameters
    with open(BEST_PARAMS_PATH, 'wb') as f:
        pickle.dump(best_params_dict, f)

    # Save the forecasts and performance after each stock
    with open(FORECASTS_PATH, 'wb') as f:
        pickle.dump(stock_forecasts, f)

    with open(PERFORMANCE_PATH, 'wb') as f:
        pickle.dump(model_performance, f)

    print(f"Saved Gradient Boosting parameters, forecasts, and performance data for {stock}.")

print("\nGradient Boosting model training and forecasting completed.")

Skipping Gradient Boosting for XYL as it has already been processed.
Skipping Gradient Boosting for WSM as it has already been processed.
Skipping Gradient Boosting for WMB as it has already been processed.
Skipping Gradient Boosting for WST as it has already been processed.
Skipping Gradient Boosting for XOM as it has already been processed.
Skipping Gradient Boosting for WY as it has already been processed.
Skipping Gradient Boosting for WFC as it has already been processed.
Skipping Gradient Boosting for WTW as it has already been processed.
Skipping Gradient Boosting for WDC as it has already been processed.
Skipping Gradient Boosting for ZBH as it has already been processed.
Skipping Gradient Boosting for WBD as it has already been processed.
Skipping Gradient Boosting for WELL as it has already been processed.
Skipping Gradient Boosting for WDAY as it has already been processed.
Skipping Gradient Boosting for VZ as it has already been processed.
Skipping Gradient Boosting for WAT

[W 2024-12-08 15:22:30,431] Trial 10 failed with parameters: {'n_estimators': 367, 'learning_rate': 0.024996199100290006, 'max_depth': 8, 'subsample': 0.7913202457019239, 'min_samples_split': 16} because of the following error: ValueError('Cannot have number of folds=6 greater than the number of samples=0.').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-37-1f9010b01bc7>", line 44, in objective_gb
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py", line 1247, in _split
    raise ValueError(
ValueError: Cannot have number of folds=6 greater than the number of samples=0.
[W 2024-12-08 15:22:30,435] Trial 10 failed with value None.


An error occurred while processing Gradient Boosting for TEAM: Cannot have number of folds=6 greater than the number of samples=0.
Saved Gradient Boosting parameters, forecasts, and performance data for TEAM.
Skipping Gradient Boosting for TRGP as it has already been processed.
Skipping Gradient Boosting for TFC as it has already been processed.
Skipping Gradient Boosting for TSLA as it has already been processed.
Skipping Gradient Boosting for TRMB as it has already been processed.
Skipping Gradient Boosting for UNH as it has already been processed.
Skipping Gradient Boosting for SPGI as it has already been processed.
Skipping Gradient Boosting for SYF as it has already been processed.
Skipping Gradient Boosting for TT as it has already been processed.
Skipping Gradient Boosting for UNP as it has already been processed.
Skipping Gradient Boosting for SYK as it has already been processed.
Skipping Gradient Boosting for UPS as it has already been processed.
Skipping Gradient Boosting fo

[W 2024-12-08 15:22:31,005] Trial 6 failed with parameters: {'n_estimators': 280, 'learning_rate': 0.16526281764853964, 'max_depth': 10, 'subsample': 0.6319601316884346, 'min_samples_split': 12} because of the following error: ValueError('Cannot have number of folds=6 greater than the number of samples=0.').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-37-1f9010b01bc7>", line 44, in objective_gb
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py", line 1247, in _split
    raise ValueError(
ValueError: Cannot have number of folds=6 greater than the number of samples=0.
[W 2024-12-08 15:22:31,006] Trial 6 failed with value None.


An error occurred while processing Gradient Boosting for TWLO: Cannot have number of folds=6 greater than the number of samples=0.
Saved Gradient Boosting parameters, forecasts, and performance data for TWLO.
Skipping Gradient Boosting for TSCO as it has already been processed.
Skipping Gradient Boosting for TXN as it has already been processed.
Skipping Gradient Boosting for TRV as it has already been processed.
Skipping Gradient Boosting for T as it has already been processed.
Skipping Gradient Boosting for SBUX as it has already been processed.
Skipping Gradient Boosting for PNC as it has already been processed.
Skipping Gradient Boosting for QCOM as it has already been processed.
Skipping Gradient Boosting for PTC as it has already been processed.
Skipping Gradient Boosting for PPG as it has already been processed.
Skipping Gradient Boosting for PWR as it has already been processed.
Skipping Gradient Boosting for PRU as it has already been processed.
Skipping Gradient Boosting for 

[W 2024-12-08 15:22:32,881] Trial 2 failed with parameters: {'n_estimators': 412, 'learning_rate': 0.03735068905825079, 'max_depth': 9, 'subsample': 0.5929253675374775, 'min_samples_split': 6} because of the following error: ValueError('Cannot have number of folds=6 greater than the number of samples=0.').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-37-1f9010b01bc7>", line 44, in objective_gb
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py", line 1247, in _split
    raise ValueError(
ValueError: Cannot have number of folds=6 greater than the number of samples=0.
[W 2024-12-08 15:22:32,886] Trial 2 failed with value None.


An error occurred while processing Gradient Boosting for ALNY: Cannot have number of folds=6 greater than the number of samples=0.
Saved Gradient Boosting parameters, forecasts, and performance data for ALNY.
Skipping Gradient Boosting for AFL as it has already been processed.
Skipping Gradient Boosting for ANET as it has already been processed.
Skipping Gradient Boosting for BBY as it has already been processed.
Skipping Gradient Boosting for ANSS as it has already been processed.
Skipping Gradient Boosting for BALL as it has already been processed.
Skipping Gradient Boosting for ACGL as it has already been processed.
Skipping Gradient Boosting for AXON as it has already been processed.
Skipping Gradient Boosting for BKR as it has already been processed.
Skipping Gradient Boosting for AMD as it has already been processed.
Skipping Gradient Boosting for AIZ as it has already been processed.
Skipping Gradient Boosting for A as it has already been processed.
Skipping Gradient Boosting fo

[W 2024-12-08 15:22:33,626] Trial 1 failed with parameters: {'n_estimators': 291, 'learning_rate': 0.03023815564873973, 'max_depth': 7, 'subsample': 0.5132281455018759, 'min_samples_split': 17} because of the following error: ValueError('Cannot have number of folds=6 greater than the number of samples=0.').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-37-1f9010b01bc7>", line 44, in objective_gb
    for i, (train_index, test_index) in enumerate(tscv.split(X)):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py", line 1247, in _split
    raise ValueError(
ValueError: Cannot have number of folds=6 greater than the number of samples=0.
[W 2024-12-08 15:22:33,628] Trial 1 failed with value None.


An error occurred while processing Gradient Boosting for HUBS: Cannot have number of folds=6 greater than the number of samples=0.
Saved Gradient Boosting parameters, forecasts, and performance data for HUBS.
Skipping Gradient Boosting for HOLX as it has already been processed.
Skipping Gradient Boosting for FTV as it has already been processed.
Skipping Gradient Boosting for HBAN as it has already been processed.
Skipping Gradient Boosting for HD as it has already been processed.
Skipping Gradient Boosting for HCA as it has already been processed.
Skipping Gradient Boosting for FLUT as it has already been processed.
Skipping Gradient Boosting for HES as it has already been processed.
Skipping Gradient Boosting for GOOGL as it has already been processed.
Skipping Gradient Boosting for JNJ as it has already been processed.
Skipping Gradient Boosting for HON as it has already been processed.
Skipping Gradient Boosting for JCI as it has already been processed.
Skipping Gradient Boosting f

In [43]:
# %%
# Load necessary libraries
import pandas as pd
import numpy as np
import pickle

# Define paths
DATA_PATH = '/content/drive/MyDrive/data/'
STUDIES_DIR = os.path.join(DATA_PATH, 'optuna_studies')
FORECASTS_PATH = os.path.join(DATA_PATH, 'stock_forecasts.pkl')
PERFORMANCE_PATH = os.path.join(DATA_PATH, 'model_performance.pkl')
BEST_PARAMS_PATH = os.path.join(DATA_PATH, 'best_params_dict.pkl')

# Load existing forecasts and performance data
with open(FORECASTS_PATH, 'rb') as f:
    stock_forecasts = pickle.load(f)

with open(PERFORMANCE_PATH, 'rb') as f:
    model_performance = pickle.load(f)

# List of all stocks processed
all_stocks = list(stock_data.keys())

# Identify inadequate stocks based on performance data
inadequate_stocks = []
adequate_stocks = []

for stock in all_stocks:
    # Check if the stock has a forecast and RMSE
    if stock in stock_forecasts and 'GradientBoosting' in stock_forecasts[stock]:
        # Further check if RMSE is not None (indicating successful processing)
        performance = next((item for item in model_performance if item['Stock'] == stock and item['Model'] == 'GradientBoosting'), None)
        if performance and performance['RMSE'] is not None:
            adequate_stocks.append(stock)
        else:
            inadequate_stocks.append(stock)
    else:
        inadequate_stocks.append(stock)

print(f"Total Stocks: {len(all_stocks)}")
print(f"Number of Adequate Stocks: {len(adequate_stocks)}")
print(f"Number of Inadequate Stocks: {len(inadequate_stocks)}")
print("\nList of Inadequate Stocks:")
print(inadequate_stocks)

Total Stocks: 260
Number of Adequate Stocks: 256
Number of Inadequate Stocks: 4

List of Inadequate Stocks:
['TEAM', 'TWLO', 'ALNY', 'HUBS']


In [45]:
# %%
# Initialize a list to store profit calculations
profit_list = []

for stock in adequate_stocks:
    try:
        # Retrieve the forecasted price
        forecast_price = stock_forecasts[stock]['GradientBoosting']

        # Get the current Close price (last available in the DataFrame)
        current_price = stock_data[stock]['Close'].iloc[-1]

        # Calculate the number of shares purchasable with £30
        num_shares = 30 / current_price

        # Calculate forecasted investment value
        forecast_value = num_shares * forecast_price

        # Calculate expected profit
        expected_profit = forecast_value - 30

        # Append a dictionary to the list
        profit_list.append({
            'Stock': stock,
            'Current_Price': round(current_price, 2),
            'Forecasted_Price': round(forecast_price, 2),
            'Expected_Profit': round(expected_profit, 2)
        })
    except Exception as e:
        print(f"Error processing stock {stock}: {e}")
        # Optionally, log the error or mark the stock as inadequate
        inadequate_stocks.append(stock)

# Convert the list of dictionaries to a DataFrame
profit_df = pd.DataFrame(profit_list)

# Sort the DataFrame by Expected Profit in descending order
profit_df.sort_values(by='Expected_Profit', ascending=False, inplace=True)

# Reset index
profit_df.reset_index(drop=True, inplace=True)

# Display the profit DataFrame
profit_df

Unnamed: 0,Stock,Current_Price,Forecasted_Price,Expected_Profit
0,SLB,43.66,43.82,0.11
1,KEYS,168.61,169.00,0.07
2,AKAM,93.86,93.96,0.03
3,ADI,217.17,217.40,0.03
4,ZBH,112.02,112.12,0.03
...,...,...,...,...
251,COO,103.43,103.33,-0.03
252,NXPI,226.53,226.31,-0.03
253,MCO,500.88,500.36,-0.03
254,EQIX,979.10,978.03,-0.03




In [46]:
# %%
# Total Investment
total_investment = 30  # in GBP

# Calculate the total expected profit of all adequate stocks
total_expected_profit = profit_df['Expected_Profit'].sum()

# Calculate the weight (allocation percentage) for each stock based on its expected profit
profit_df['Weight'] = profit_df['Expected_Profit'] / total_expected_profit

# Calculate the investment allocation for each stock
profit_df['Investment'] = profit_df['Weight'] * total_investment

# Calculate the number of shares for each stock
profit_df['Number_of_Shares'] = profit_df['Investment'] / profit_df['Current_Price']

# Calculate the expected profit for the allocated investment
profit_df['Allocated_Profit'] = profit_df['Number_of_Shares'] * (profit_df['Forecasted_Price'] - profit_df['Current_Price'])

# Round the numerical columns for better readability
profit_df[['Investment', 'Number_of_Shares', 'Allocated_Profit']] = profit_df[['Investment', 'Number_of_Shares', 'Allocated_Profit']].round(2)

# Display the updated DataFrame with allocations
profit_df[['Stock', 'Current_Price', 'Forecasted_Price', 'Expected_Profit', 'Investment', 'Number_of_Shares', 'Allocated_Profit']]

Unnamed: 0,Stock,Current_Price,Forecasted_Price,Expected_Profit,Investment,Number_of_Shares,Allocated_Profit
0,SLB,43.66,43.82,0.11,-30.00,-0.69,-0.11
1,KEYS,168.61,169.00,0.07,-19.09,-0.11,-0.04
2,AKAM,93.86,93.96,0.03,-8.18,-0.09,-0.01
3,ADI,217.17,217.40,0.03,-8.18,-0.04,-0.01
4,ZBH,112.02,112.12,0.03,-8.18,-0.07,-0.01
...,...,...,...,...,...,...,...
251,COO,103.43,103.33,-0.03,8.18,0.08,-0.01
252,NXPI,226.53,226.31,-0.03,8.18,0.04,-0.01
253,MCO,500.88,500.36,-0.03,8.18,0.02,-0.01
254,EQIX,979.10,978.03,-0.03,8.18,0.01,-0.01


In [47]:
# %%
# Final Ranked List with Investment Strategy
final_ranking = profit_df[['Stock', 'Current_Price', 'Forecasted_Price', 'Expected_Profit', 'Investment', 'Number_of_Shares', 'Allocated_Profit']].copy()

# Calculate the total allocated investment and total expected profit
total_allocated_investment = final_ranking['Investment'].sum()
total_allocated_profit = final_ranking['Allocated_Profit'].sum()

# Display the final ranking
print("Final Ranked List of Stocks Based on Expected Profit:")
display(final_ranking)

print(f"Total Allocated Investment: £{round(total_allocated_investment, 2)}")
print(f"Total Expected Profit: £{round(total_allocated_profit, 2)}")

Final Ranked List of Stocks Based on Expected Profit:


Unnamed: 0,Stock,Current_Price,Forecasted_Price,Expected_Profit,Investment,Number_of_Shares,Allocated_Profit
0,SLB,43.66,43.82,0.11,-30.00,-0.69,-0.11
1,KEYS,168.61,169.00,0.07,-19.09,-0.11,-0.04
2,AKAM,93.86,93.96,0.03,-8.18,-0.09,-0.01
3,ADI,217.17,217.40,0.03,-8.18,-0.04,-0.01
4,ZBH,112.02,112.12,0.03,-8.18,-0.07,-0.01
...,...,...,...,...,...,...,...
251,COO,103.43,103.33,-0.03,8.18,0.08,-0.01
252,NXPI,226.53,226.31,-0.03,8.18,0.04,-0.01
253,MCO,500.88,500.36,-0.03,8.18,0.02,-0.01
254,EQIX,979.10,978.03,-0.03,8.18,0.01,-0.01


Total Allocated Investment: £30.0
Total Expected Profit: £-0.26


In [48]:
# %%
# Display Inadequate Stocks
if inadequate_stocks:
    inadequate_df = pd.DataFrame({'Inadequate Stocks': inadequate_stocks})
    print("List of Inadequate Stocks (Excluded from Analysis):")
    display(inadequate_df)
else:
    print("No Inadequate Stocks found.")

List of Inadequate Stocks (Excluded from Analysis):


Unnamed: 0,Inadequate Stocks
0,TEAM
1,TWLO
2,ALNY
3,HUBS


In [49]:
# %%
# Summary
print("### Investment Strategy Summary ###\n")

print(f"**Total Investment Available:** £{total_investment}\n")
print(f"**Total Expected Profit:** £{round(total_allocated_profit, 2)}\n")
print("**Portfolio Allocation:**")
display(final_ranking[['Stock', 'Investment', 'Number_of_Shares', 'Allocated_Profit']])

if inadequate_stocks:
    print("\n**Inadequate Stocks (Excluded from Portfolio):**")
    display(inadequate_df)
else:
    print("\nAll stocks were adequate for the analysis.")

### Investment Strategy Summary ###

**Total Investment Available:** £30

**Total Expected Profit:** £-0.26

**Portfolio Allocation:**


Unnamed: 0,Stock,Investment,Number_of_Shares,Allocated_Profit
0,SLB,-30.00,-0.69,-0.11
1,KEYS,-19.09,-0.11,-0.04
2,AKAM,-8.18,-0.09,-0.01
3,ADI,-8.18,-0.04,-0.01
4,ZBH,-8.18,-0.07,-0.01
...,...,...,...,...
251,COO,8.18,0.08,-0.01
252,NXPI,8.18,0.04,-0.01
253,MCO,8.18,0.02,-0.01
254,EQIX,8.18,0.01,-0.01



**Inadequate Stocks (Excluded from Portfolio):**


Unnamed: 0,Inadequate Stocks
0,TEAM
1,TWLO
2,ALNY
3,HUBS


In [50]:
# %%
# Define the allocation function
def allocate_investment(profit_df, total_investment=30):
    """
    Allocates the total investment to the stock(s) with the highest expected profit.

    Parameters:
    - profit_df (pd.DataFrame): DataFrame containing 'Stock', 'Expected_Profit'.
    - total_investment (float): Total amount to invest.

    Returns:
    - allocation (dict): Dictionary with 'Stock', 'Investment', 'Expected_Profit'.
    - total_profit (float): Total expected profit from the investment.
    """
    if profit_df.empty:
        print("No adequate stocks available for investment.")
        return None, 0.0

    # Sort the DataFrame by Expected Profit in descending order
    sorted_df = profit_df.sort_values(by='Expected_Profit', ascending=False).reset_index(drop=True)

    # Select the top stock
    top_stock = sorted_df.iloc[0]

    # Allocate the entire investment to the top stock
    allocation = {
        'Stock': top_stock['Stock'],
        'Investment': round(total_investment, 2),
        'Expected_Profit': round(top_stock['Expected_Profit'], 2)
    }

    # Total profit is the expected profit from the top stock
    total_profit = allocation['Expected_Profit']

    return allocation, total_profit

# %%
# Allocate investment
allocation, total_profit = allocate_investment(profit_df, total_investment=30)

if allocation:
    # Create a DataFrame for allocation
    allocation_df = pd.DataFrame([allocation])

    print("### Investment Allocation ###\n")
    display(allocation_df)

    print(f"**Total Investment:** £{allocation['Investment']}")
    print(f"**Total Expected Profit:** £{total_profit}")
else:
    print("No allocation made due to lack of adequate stocks.")

# %%
# Display Inadequate Stocks
if inadequate_stocks:
    inadequate_df = pd.DataFrame({'Inadequate Stocks': inadequate_stocks})
    print("### Inadequate Stocks (Excluded from Analysis) ###")
    display(inadequate_df)
else:
    print("No Inadequate Stocks found.")

# %%
# Summary of Investment Strategy
if allocation:
    print("### Investment Strategy Summary ###\n")

    print(f"**Total Investment Available:** £{allocation['Investment']}\n")
    print(f"**Total Expected Profit:** £{allocation['Expected_Profit']}\n")
    print("**Portfolio Allocation:**")
    display(allocation_df[['Stock', 'Investment', 'Expected_Profit']])
else:
    print("No investment allocation was made due to lack of adequate stocks.")

### Investment Allocation ###



Unnamed: 0,Stock,Investment,Expected_Profit
0,SLB,30,0.11


**Total Investment:** £30
**Total Expected Profit:** £0.11
### Inadequate Stocks (Excluded from Analysis) ###


Unnamed: 0,Inadequate Stocks
0,TEAM
1,TWLO
2,ALNY
3,HUBS


### Investment Strategy Summary ###

**Total Investment Available:** £30

**Total Expected Profit:** £0.11

**Portfolio Allocation:**


Unnamed: 0,Stock,Investment,Expected_Profit
0,SLB,30,0.11
