In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

portfolio = [
    {
        "stock_name": "Tesla",
        "ticker_symbol": "TSLA",
        "stocks_owned": 0.095238,
        "average_cost": 210,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Alphabet Class A",
        "ticker_symbol": "GOOGL",
        "stocks_owned": 1.136565,
        "average_cost": 131.98,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Apple",
        "ticker_symbol": "AAPL",
        "stocks_owned": 0.116965,
        "average_cost": 170.99,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "PayPal",
        "ticker_symbol": "PYPL",
        "stocks_owned": 5.668852,
        "average_cost": 52.92,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "NetFlix",
        "ticker_symbol": "NFLX",
        "stocks_owned": 0.049588,
        "average_cost": 403.32,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Amazon",
        "ticker_symbol": "AMZN",
        "stocks_owned": 0.160462,
        "average_cost": 124.64,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Microsoft",
        "ticker_symbol": "MSFT",
        "stocks_owned": 0.122887,
        "average_cost": 325.50,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Starbucks",
        "ticker_symbol": "SBUX",
        "stocks_owned": 0.049588,
        "average_cost": 403.32,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "NVIDIA",
        "ticker_symbol": "NVDA",
        "stocks_owned": 0.048499,
        "average_cost": 412.38,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "GM",
        "ticker_symbol": "GM",
        "stocks_owned": 2.272666,
        "average_cost": 29.33,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "CAVA",
        "ticker_symbol": "CAVA",
        "stocks_owned": 2.759214,
        "average_cost": 36.24,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "iShares Semiconductor ETF",
        "ticker_symbol": "SOXX",
        "stocks_owned": 0.814155,
        "average_cost": 185.64,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Coca-Cola Consolidated",
        "ticker_symbol": "COKE",
        "stocks_owned": 0.033374,
        "average_cost": 816.50,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Walmart",
        "ticker_symbol": "WMT",
        "stocks_owned": 0.41668,
        "average_cost": 60,
        "as_of_date": "2024-04-10"
    },
    {
        "stock_name": "Coca-Cola",
        "ticker_symbol": "KO",
        "stocks_owned": 1,
        "average_cost": 58.09,
        "as_of_date": "2024-04-17"
    }
]

class StockDataFetcher:
    def __init__(self, portfolio):
        """
        Initializes the class with a portfolio.
        :param portfolio: A list of dictionaries, each containing the ticker symbol of a stock
        """
        self.portfolio = portfolio
    
    def get_historical_data(self, period="10y", interval="1d"):
        historical_data = {}
        for stock in self.portfolio:
            ticker = yf.Ticker(stock['ticker_symbol'])
            data = ticker.history(period=period, interval=interval)
            historical_data[stock['ticker_symbol']] = data
        return historical_data
    
    def fetch_current_market_data(self):
        market_data = {}
        for stock in self.portfolio:
            ticker = yf.Ticker(stock['ticker_symbol'])
            market_data[stock['ticker_symbol']] = ticker.info
        return market_data
    
    def fetch_financials(self):
        financial_data = {}
        for stock in self.portfolio:
            ticker_symbol = stock['ticker_symbol']
            ticker = yf.Ticker(ticker=ticker_symbol)
            financial_data[ticker_symbol] = {
                'annual_financials': ticker.financials,
                'quarterly_financials': ticker.quarterly_financials,
            }
        return financial_data

In [2]:
import pandas as pd


class FeatureEngineering:
    def __init__(self, market_dict, historical_data, financial_data):
        self.market_data = market_dict
        self.historical_data = historical_data
        self.financial_data = financial_data
        self.info_fields = {
            'marketCap', 'trailingPE', 'forwardPE', 'priceToSalesTrailing12Months',
            'bookValue', 'pegRatio', 'dividendYield', 'debtToEquity', 'returnOnEquity',
            'beta', 'currentRatio', 'quickRatio', 'freeCashflow', 'operatingMargins', 
            'ebitdaMargins', 'grossMargins', 'payoutRatio', 'priceToBook', 'enterpriseToRevenue',
            'enterpriseToEbitda', 'earningsQuarterlyGrowth', 'revenueGrowth', 
            'returnOnAssets', 'operatingCashflow', 'dividendYield', 'volume', 'currentPrice'
        }
        
    def structure_historical_data(self):
        structured_data = []
        for ticker, data in self.historical_data.items():
            # Assign 'Ticker' to each DataFrame and convert it to a column
            data['Ticker'] = ticker
            # Set 'Date' and 'Ticker' as a multi-level index
            data.set_index(['Ticker'], append=True, inplace=True)
            structured_data.append(data)
        consolidated_history = pd.concat(structured_data, ignore_index=False)
        return consolidated_history
    
    def consolidate_financials(self):
        annual_financials = pd.DataFrame()
        quarterly_fianancials = pd.DataFrame()
        for ticker_symbol, data in self.financial_data.items():
            for report_type in ['annual_financials', 'quarterly_financials']:
                df = data[report_type].copy()
                if not pd.api.types.is_datetime64_any_dtype(df.columns):
                    df = df.transpose()
                df = df.reset_index().melt(
                    id_vars='index', var_name='Date', value_name='Value'
                )
                df.columns = ['Financial_Metric', 'Date', 'Value']
                df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
                df['Ticker'] = ticker_symbol
            if report_type == 'annual_financials':
                annual_financials = pd.concat(
                    [annual_financials, df], ignore_index=True
                )
            elif report_type == 'quarterly_financials':
                quarterly_fianancials = pd.concat(
                    [quarterly_fianancials, df], ignore_index=True
                )
            if not annual_financials.empty:
                annual_financials.set_index('Date', inplace=True)
            if not quarterly_fianancials.empty:
                quarterly_fianancials.set_index('Date', inplace=True)
        return annual_financials, quarterly_fianancials
                
    def consolidate_info_fields(self):
        info_data = []
        for ticker, value in self.market_data.items():
            info_dict = {field: value.get(field, None) for field in self.info_fields}
            info_dict['Ticker'] = ticker
            info_data.append(info_dict)
        info_dataframe = pd.DataFrame(info_data)
        info_dataframe.set_index('Ticker', inplace=True)
        return info_dataframe

In [3]:
import pandas as pd


class ETFDataFiller:
    def __init__(self, market_data, data_fetcher):
        """
        Initializes the ETFDataFiller with market data and a data fetching class that provides ETF data.
        
        :param market_data: DataFrame containing the market data with tickers as indices.
        :param data_fetcher: An instance of a class that can provide ETF data
        """
        self.market_data = market_data
        self.data_fetcher = data_fetcher
        self.mapping = {
            'currentPrice': 'navPrice',
            'beta': 'beta3Year',
            'dividendYield': 'trailingAnnualDividendYield',
            'marketCap': 'totalAssets',
            'returnOnEquity': 'threeYearAverageReturn',
        }
        self.etf_relevant_keys = [
            'trailingAnnualDividendRate', 'trailingAnnualDividendYield', 
            'lastDividendValue', 'lastDividendDate', 'yield', 
            'navPrice', 'category', 'ytdReturn', 'beta3Year', 
            'threeYearAverageReturn', 'totalAssets', 
        ]

    def is_etf(self, ticker):
        """ 
        Determine if the ticker is an ETF by checking if all mapped fields in market_data are NaN. 
        """
        return all(pd.isna(self.market_data.at[ticker, col]) for col in self.mapping)

    def get_etf_data(self, ticker):
        """ 
        Retrieve and filter data for a specific ETF based on relevant keys. 
        """
        etf_data = {}
        full_etf_data = self.data_fetcher.fetch_current_market_data()[ticker] 
        for key in full_etf_data:
            if key in self.etf_relevant_keys:
                etf_data[key] = full_etf_data[key]
        return etf_data

    def fill_data(self, ticker):
        """ 
        Fill NaN values in the market data for a specific 
        ticker using the ETF data provided.
        """
        if self.is_etf(ticker):
            etf_data = self.get_etf_data(ticker)
            for market_col, etf_col in self.mapping.items():
                if pd.isna(self.market_data.at[ticker, market_col]) \
                    and etf_col in etf_data:
                    self.market_data.at[ticker, market_col] = etf_data[etf_col]

    def fill_all_etfs(self):
        """ 
        Identify and fill data for all ETFs in the market_data DataFrame. 
        """
        for ticker in self.market_data.index:
            if self.is_etf(ticker):
                self.fill_data(ticker)

    def display_data(self):
        """ Utility method to display the DataFrame. """
        print(self.market_data)

In [4]:
data_fetcher = StockDataFetcher(portfolio=portfolio)
historical_data = data_fetcher.get_historical_data()
financial_data = data_fetcher.fetch_financials()
market_dict = data_fetcher.fetch_current_market_data()

In [5]:
feature_engineering = FeatureEngineering(market_dict, historical_data, financial_data)
market_data = feature_engineering.consolidate_info_fields()

In [6]:
market_data

Unnamed: 0_level_0,marketCap,pegRatio,ebitdaMargins,trailingPE,currentRatio,enterpriseToRevenue,earningsQuarterlyGrowth,returnOnEquity,dividendYield,grossMargins,...,forwardPE,revenueGrowth,operatingMargins,priceToBook,debtToEquity,enterpriseToEbitda,currentPrice,returnOnAssets,freeCashflow,volume
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TSLA,476572000000.0,3.67,0.1401,34.8,1.726,4.743,1.15,0.27348,,0.18249,...,41.337017,0.035,0.08201,7.609458,15.05,33.851,149.64,0.05882,2230375000.0,9223830
GOOGL,1938139000000.0,1.19,0.32587,26.762068,2.097,6.046,0.518,0.27356,,0.56937,...,19.723,0.135,0.28849,6.824957,10.54,18.554,155.22,0.14366,58657750000.0,2979502
AAPL,2563973000000.0,2.32,0.33733,25.822706,1.073,6.778,0.131,1.54269,0.0057,0.45027,...,23.287516,0.021,0.33764,34.642185,145.803,20.094,166.04,0.21181,86563130000.0,5943263
PYPL,65611060000.0,0.73,0.1829,16.231771,1.291,2.161,0.522,0.20549,,0.39586,...,11.150269,0.087,0.17219,3.17411,56.263,11.816,62.33,0.03774,5118500000.0,828647
NFLX,245933200000.0,1.36,0.23549,47.2394,1.068,7.752,0.787,0.29796,,0.43065,...,26.63027,0.148,0.28094,11.492911,65.601,32.918,568.29,0.10009,19938770000.0,2055330
AMZN,1845512000000.0,1.42,0.14878,61.174824,1.045,3.369,37.216,0.1749,,0.46982,...,33.536293,0.139,0.07525,9.124466,80.037,22.644,177.407,0.04651,45475750000.0,5223834
MSFT,3001303000000.0,2.16,0.52037,36.454876,1.218,13.333,0.332,0.39174,0.0074,0.69815,...,30.233534,0.176,0.43586,12.598877,46.736,25.622,403.92,0.1519,58681000000.0,3003966
SBUX,99216670000.0,1.39,0.19753,23.430536,0.699,3.265,0.198,,0.0262,0.27869,...,18.724403,0.082,0.15167,,,16.53,87.6302,0.12524,3493350000.0,920416
NVDA,2093325000000.0,0.91,0.56597,70.12814,4.171,34.501,7.688,0.91458,0.0002,0.72718,...,27.310177,2.653,0.61593,48.00654,25.725,60.958,837.33,0.38551,19866880000.0,4139513
GM,49247980000.0,0.41,0.09185,5.827869,1.076,0.901,0.051,0.13998,0.0113,0.11217,...,4.587097,-0.003,0.02027,0.796311,179.941,9.805,42.66,0.02239,4468125000.0,550821


In [7]:
etf_filler = ETFDataFiller(market_data, data_fetcher)
etf_filler.fill_all_etfs()

In [8]:
def find_nearest_date(target_date, index, tolerance_days=5):
    # Ensure the target_date is localized to New York timezone
    if target_date.tzinfo is None \
        or target_date.tzinfo.utcoffset(target_date) is None:
        target_date = pd.Timestamp(target_date).tz_localize('America/New_York')
    else:
        target_date = target_date.tz_convert('America/New_York')
    
    # Generate a date range for the target date with a specified tolerance
    date_range = pd.date_range(
        start=target_date - pd.DateOffset(days=tolerance_days), 
        end=target_date + pd.DateOffset(days=tolerance_days),
        tz='America/New_York'  # Ensure the date range is in the New York timezone
    )

    # Convert the index to New York timezone if it isn't already
    if index.tz is None:
        index = index.tz_localize('America/New_York')
    elif index.tz.zone != 'America/New_York':
        index = index.tz_convert('America/New_York')

    # Find intersection of the provided index with the generated date range
    valid_dates = index.intersection(date_range)
    if not valid_dates.empty:
        # Find the nearest date by calculating minimum absolute difference
        nearest_date = valid_dates[np.abs(valid_dates - target_date).argmin()]
        return nearest_date
    
    return np.nan
    
def get_current_price(stock_symbol):
    stock_data = yf.Ticker(stock_symbol)
    current_price = stock_data.history(period='1d')['Close'][0]
    return current_price

In [9]:
class PortfolioAnalysisEngine:
    def __init__(
        self, portfolio_data, market_data, historical_data
    ):
        self.historical_data = historical_data
        self.portfolio_data = pd.DataFrame(portfolio_data)
        self.portfolio_data.set_index('ticker_symbol', inplace=True)
        self.market_data = market_data
        self.metrics = {
            'marketCap': 'high', 'trailingPE': 'low', 'forwardPE': 'low',
            'priceToSalesTrailing12Months': 'low', 'bookValue': 'high',
            'pegRatio': 'low', 'dividendYield': 'high', 'debtToEquity': 'low',
            'returnOnEquity': 'high', 'momentum': 'high', 'portfolioDiversity': 'high',
            'beta': 'low', 'currentRatio':'high', 'quickRatio': 'high', 
            'freeCashflow':'high', 'operatingMargins': 'high', 'ebitdaMargins': 'high',
            'grossMargins': 'high', 'payoutRatio': 'low', 'priceToBook': 'low', 
            'enterpriseToRevenue': 'low', 'enterpriseToEbitda': 'low', 
            'earningsQuarterlyGrowth': 'high', 'revenueGrowth': 'high', 
            'returnOnAssets': 'high', 'operatingCashflow': 'high',
            'averageVolume': 'high', 'volumeChange': 'high', 'sharpe_ratio': 'high'
        }
        self.initialize_weights()

    def initialize_weights(self):
        self.weights = {
            ticker: 1.0 / len(self.market_data) for ticker in self.market_data.index
        }
    
    def normalize_metric(self, series, direction):
        min_val = series.min()
        max_val = series.max()
        if direction == 'high':
            return (series - min_val) / (max_val - min_val)
        return 1 - (series - min_val) / (max_val - min_val)
    
    def normalize_scores(self, dataframe, columns):
        for col in columns:
            min_val = dataframe[col].min()
            max_val = dataframe[col].max()
            dataframe[col] = (dataframe[col] - min_val) / (max_val - min_val)
    
    def calculate_portfolio_diversity(self):

        # Merge 'portfolio_data' with 'market_data' directly on their indices
        self.portfolio_data = self.portfolio_data.merge(
            self.market_data[['currentPrice']],
            left_index=True,
            right_index=True,
            how='left'
        )
        # Calculate total cost and current market value per stock
        self.portfolio_data['total_cost'] = self.portfolio_data['stocks_owned'] * \
            self.portfolio_data['average_cost']
        total_portfolio_value = self.portfolio_data['total_cost'].sum()
        self.portfolio_data['dollar_value'] = self.portfolio_data['stocks_owned'] * \
            self.portfolio_data['currentPrice']
        # Calculate a discount score where larger scores are incentivised
        self.portfolio_data['discount_score'] = (
            self.portfolio_data['average_cost'] - self.portfolio_data['currentPrice']
        ) / self.portfolio_data['average_cost']
        # Balance score where larger scores are deincentivized
        self.portfolio_data['balance_score'] = 1 / (
            self.portfolio_data['dollar_value'] / total_portfolio_value
        )
        # Normalize scores to range between 0 and 1
        self.normalize_scores(
            self.portfolio_data, ['discount_score', 'balance_score']
        )
        # Calculate 'portfolioDiversity' as a weighted sum of 'discount_score' and 'balance_score'
        self.portfolio_data['portfolioDiversity'] = (
            (0.7 * self.portfolio_data['discount_score']) + \
                (0.3 * self.portfolio_data['balance_score'])
        )
        # Merge the portfolio diversity back into the market data
        self.market_data = self.market_data.merge(
            self.portfolio_data[['portfolioDiversity']],
            left_index=True,
            right_index=True,
            how='left'
        )

    def calculate_momentum(self):
        latest_date = pd.Timestamp('today').floor('D') - pd.DateOffset(days=1)
        end_date = latest_date - pd.DateOffset(days=21)
        start_date = end_date - pd.DateOffset(days=230)
        percent_changes = {}
        for ticker, data in self.historical_data.items():
            data = data.sort_index()
            # Find the nearest valid start and end dates
            valid_start_date = find_nearest_date(start_date, data.index)
            valid_end_date = find_nearest_date(end_date, data.index)
            # Calculate percent change if both dates are found
            if pd.notna(valid_start_date) and pd.notna(valid_end_date) \
                and valid_end_date > valid_start_date:
                start_close = data.loc[valid_start_date, 'Close']
                end_close = data.loc[valid_end_date, 'Close']
                percent_change = (end_close - start_close) / start_close
            else:
                percent_change = np.nan
            percent_changes[ticker] = percent_change         
            # Map the percent change (momentum) to the market_data DataFrame
            if ticker in self.market_data.index:
                self.market_data.loc[ticker, 'momentum'] = percent_change

        # Normalize the momentum scores directly within market_data
        self.normalize_scores(self.market_data, ['momentum'])
        
    def calculate_volume_metrics(self):
        for ticker, data in self.historical_data.items():
            if 'Volume' in data.columns:
                data['averageVolume'] = data['Volume'].rolling(window=50).mean()
                data['relativeVolume'] = data['Volume'] / data['averageVolume']
                data['volumeChange'] = data['Volume'].pct_change()
            
            # Update market data
            for metric in ['averageVolume', 'relativeVolume', 'volumeChange']:
                    self.market_data.at[ticker, metric] = data[metric].dropna().iloc[-1] \
                    if not data[metric].dropna().empty else np.nan
    
    def calculate_sharpe_ratio(self, risk_free_rate=0.01):
        # 252 trading days per year
        # formula for daily risk free rate is below
        daily_risk_free_rate = (1 + risk_free_rate) ** (1/252) - 1
        for ticker, data in self.historical_data.items():
            if 'Close' in data.columns:
                # Calculate returns as a pct change
                daily_returns = data['Close'].pct_change()
                excess_daily_returns = daily_returns - daily_risk_free_rate
                # Calculate the mean and standard deviation of excess daily returns
                mean_excess_returns = excess_daily_returns.mean()
                std_excess_returns = excess_daily_returns.std()
                # Calculate Sharpe Ratio and annualize for benchmarch comparison
                if std_excess_returns > 0:
                    sharpe_ratio = mean_excess_returns / std_excess_returns
                    annual_sharpe_ratio = sharpe_ratio * (252 ** 0.5)
                    self.market_data.at[ticker, 'sharpe_ratio'] = annual_sharpe_ratio
                else:
                    self.market_data.at[ticker, 'sharpe_ratio'] = np.nan
            else:
                self.market_data.at[ticker, 'sharpe_ratio'] = np.nan
        # normalize scores
        self.normalize_scores(self.market_data, ['sharpe_ratio'])
                
    def calculate_all_metrics(self):
        self.calculate_volume_metrics()
        for ticker, data in self.historical_data.items():
            # Normalize each metric according to its specified direction
            for metric, direction in self.metrics.items():
                if metric in self.market_data.columns:
                    self.market_data[metric] = self.normalize_metric(
                        self.market_data[metric], direction
                    )
        # Calculate fundamental score as the mean of all metrics
        self.market_data['fundamental_score'] = self.market_data[list(self.metrics.keys())].mean(axis=1)
        
    def apply_strategy(self):
        
        self.calculate_portfolio_diversity()
        self.calculate_momentum()
        self.calculate_sharpe_ratio()
        self.calculate_all_metrics()

        # Normalize the fundamental scores for proportional adjustment
        max_score = self.market_data['fundamental_score'].max()
        min_score = self.market_data['fundamental_score'].min()
        normalized_scores = (
            self.market_data['fundamental_score'] - min_score
        ) / (max_score - min_score)
        
        # Adjust initial weights based on normalized fundamental scores
        self.weights = {
            ticker: (self.weights[ticker] * normalized_scores.loc[ticker])
            for ticker in self.market_data.index
        }
        # Normalize weights to ensure they sum to 1
        total_weight = sum(self.weights.values())
        self.weights = {
            ticker: weight / total_weight for ticker, weight in self.weights.items()
        }                     

In [10]:
class MarkovModel:
    """
    A statistical model for randomly changing systems that assumes
    that future states depend on the current state and not the sequence
    of events preceeding it
    """
    def __init__(self, data, years=3) -> None:
        self.original_data = data
        self.historical_data = self.limit_data_to_recent_years(data, years)
        self.thresholds = {}
        self.states = {}
        self.transition_matrices = {}
    
    def limit_data_to_recent_years(self, data, years):
        """ Limit data to the most recent years specified. """
        current_date = pd.to_datetime('today').tz_localize('America/New_York')
        cutoff_date = current_date - pd.DateOffset(years=years)
        return data[data.index >= cutoff_date]
        
    def calculate_thresholds(self):
        data = self.historical_data
        price_changes = data['Close'].pct_change().dropna()
        mean_change = price_changes.mean()
        std_dev = price_changes.std()
        
        # Define thresholds for significant and minor changes
        self.thresholds = {
            'significant': mean_change + std_dev,
            'minor': mean_change
        }
        
    def define_states(self):
        """
        Let S be a finite set of states in the Markov model, denoted as 
        S = {s1, s2, s3, s4, .. , sn}
        the probability of moving from one probability to another is P = [pij]
        pij = P(Xt+1 = sj | Xt = si)
        """
        self.calculate_thresholds()
        price_changes = self.historical_data['Close'].pct_change().dropna()
        significant = self.thresholds['significant']
        minor = self.thresholds['minor']

        self.states = price_changes.apply(
            lambda change:
                0 if change <= -significant else
                1 if -significant < change <= -minor else
                2 if -minor < change <= minor else
                3)
    
    def markov_chain_transition_matrix(self):
        """
        Shows the probability of each state going to the other side
        """
        self.define_states()
        states = self.states
        transition_matrix = pd.crosstab(states, states.shift(-1), normalize='index')
        self.transition_matrices = transition_matrix
        return self.transition_matrices

    def predict_next_state(self):
        """
        Uses the transition matrix to predict the next state 
        based on the current state’s probabilities.
        """
        transition_matrix = self.markov_chain_transition_matrix()
        current_state = self.states.iloc[-1]
        next_state_probabilities = transition_matrix.loc[current_state]
        next_state = np.random.choice(
            next_state_probabilities.index, p=next_state_probabilities.values
        )
        return next_state

In [11]:
class CandlestickPatterns:
    def __init__(self, historical_data) -> None:
        self.data = historical_data
        
    def identify_doji(self, ticker):
        df = self.data[ticker]
        body = np.abs(df['Close'] - df['Open'])
        price_range = df['High'] - df['Low']
        return body <= (price_range * 0.1)

    def identify_hammer(self, ticker):
        df = self.data[ticker]
        body = np.abs(df['Close'] - df['Open'])
        total_range = df['High'] - df['Low']
        lower_shadow = np.minimum(df['Open'], df['Close']) - df['Low']
        upper_shadow = df['High'] - np.maximum(df['Open'], df['Close'])
        return (body <= total_range * 0.3) & \
            (lower_shadow >= 2 * body) & (upper_shadow <= body * 0.3)

    def identify_inverted_hammer(self, ticker):
        df = self.data[ticker]
        body = np.abs(df['Close'] - df['Open'])
        total_range = df['High'] - df['Low']
        lower_shadow = np.minimum(df['Open'], df['Close']) - df['Low']
        upper_shadow = df['High'] - np.maximum(df['Open'], df['Close'])
        return (body <= total_range * 0.3) & \
            (upper_shadow >= 2 * body) & (lower_shadow <= body * 0.3)

    def identify_shooting_star(self, ticker):
        df = self.data[ticker]
        body = np.abs(df['Close'] - df['Open'])
        total_range = df['High'] - df['Low']
        lower_shadow = np.minimum(df['Open'], df['Close']) - df['Low']
        upper_shadow = df['High'] - np.maximum(df['Open'], df['Close'])
        return (body <= total_range * 0.3) & \
            (upper_shadow >= 2 * body) & (lower_shadow <= body * 0.1)

    def identify_spinning_tops(self, ticker):
        df = self.data[ticker]
        body = np.abs(df['Close'] - df['Open'])
        total_range = df['High'] - df['Low']
        upper_shadow = df['High'] - np.maximum(df['Open'], df['Close'])
        lower_shadow = np.minimum(df['Open'], df['Close']) - df['Low']
        return (body <= total_range * 0.1) & (upper_shadow >= body) & (lower_shadow >= body)

    def identify_engulfing(self, ticker):
        df = self.data[ticker]
        current_body = df['Close'] - df['Open']
        previous_body = df['Close'].shift(1) - df['Open'].shift(1)
        return (np.abs(current_body) > np.abs(previous_body)) & \
            (np.sign(current_body) != np.sign(previous_body))

    def identify_harami(self, ticker):
        df = self.data[ticker]
        current_body = df['Close'] - df['Open']
        previous_body = df['Close'].shift(1) - df['Open'].shift(1)
        return (np.abs(current_body) < np.abs(previous_body)) & \
            (np.sign(current_body) != np.sign(previous_body))

    def identify_piercing_line(self, ticker):
        df = self.data[ticker]
        previous_close = df['Close'].shift(1)
        previous_open = df['Open'].shift(1)
        return (df['Open'] < previous_close) & \
            (
                df['Close'] > previous_open + (previous_close - previous_open) / 2
        )

    def identify_dark_cloud_cover(self, ticker):
        df = self.data[ticker]
        previous_close = df['Close'].shift(1)
        previous_open = df['Open'].shift(1)
        return (df['Open'] > previous_close) & \
            (
                df['Close'] < previous_open + (previous_close - previous_open) / 2
        )

    def identify_morning_star(self, ticker):
        df = self.data[ticker]
        first = df['Close'].shift(2) > df['Open'].shift(2)
        second = np.abs(
            df['Close'].shift(1) - df['Open'].shift(1)
        ) <= (
            df['High'].shift(1) - df['Low'].shift(1)
        ) * 0.1
        third = (df['Open'] < df['Close'].shift(1)) & \
            (df['Close'] > df['Open'].shift(2))
        return first & second & third

    def identify_evening_star(self, ticker):
        df = self.data[ticker]
        first = df['Close'].shift(2) < df['Open'].shift(2)
        second = np.abs(
            df['Close'].shift(1) - df['Open'].shift(1)
            ) <= (
                df['High'].shift(1) - df['Low'].shift(1)
        ) * 0.1
        third = (
            df['Open'] > df['Close'].shift(1)
            ) & (
                df['Close'] < df['Open'].shift(2)
        )
        return first & second & third

    def identify_three_white_soldiers(self, ticker):
        df = self.data[ticker]
        first = df['Close'].shift(2) > df['Open'].shift(2)
        second = df['Close'].shift(1) > df['Open'].shift(1)
        third = df['Close'] > df['Open']
        increasing = first & second & third
        higher_closes = (
            df['Close'].shift(2) < df['Close'].shift(1)
            ) & (
                df['Close'].shift(1) < df['Close']
        )
        return increasing & higher_closes

    def identify_three_black_crows(self, ticker):
        df = self.data[ticker]
        first = df['Close'].shift(2) < df['Open'].shift(2)
        second = df['Close'].shift(1) < df['Open'].shift(1)
        third = df['Close'] < df['Open']
        decreasing = first & second & third
        lower_closes = (
            df['Close'].shift(2) > df['Close'].shift(1)
        ) & (
            df['Close'].shift(1) > df['Close']
        )
        return decreasing & lower_closes
    
    def identify_patterns_for_ticker(self, ticker):
        """
        Identifies all patterns for the historical data of a stock
        """
        df = self.data[ticker]
        patterns = pd.DataFrame(index=df.index)
        patterns['Doji'] = self.identify_doji(ticker)
        patterns['Hammer'] = self.identify_hammer(ticker)
        patterns['Inverted Hammer'] = self.identify_inverted_hammer(ticker)
        patterns['Shooting Star'] = self.identify_shooting_star(ticker)
        patterns['Spinning Tops'] = self.identify_spinning_tops(ticker)
        patterns['Engulfing'] = self.identify_engulfing(ticker)
        patterns['Harami'] = self.identify_harami(ticker)
        patterns['Piercing Line'] = self.identify_piercing_line(ticker)
        patterns['Dark Cloud Cover'] = self.identify_dark_cloud_cover(ticker)
        patterns['Morning Star'] = self.identify_morning_star(ticker)
        patterns['Evening Star'] = self.identify_evening_star(ticker)
        patterns['Three White Soldiers'] = self.identify_three_white_soldiers(ticker)
        patterns['Three Black Crows'] = self.identify_three_black_crows(ticker)
        return patterns
        
    def find_patterns(self):
        
        """Applies pattern identification across all tickers and aggregates results."""
        all_patterns = {}
        for ticker in self.data:
            all_patterns[ticker] = self.identify_patterns_for_ticker(ticker)
        return all_patterns

In [12]:


class PortfolioAnalysisEngine:
    def __init__(
        self, portfolio_data, market_data, historical_data
    ):
        self.historical_data = historical_data
        self.portfolio_data = pd.DataFrame(portfolio_data)
        self.portfolio_data.set_index('ticker_symbol', inplace=True)
        self.market_data = market_data
        self.metrics = {
            'marketCap': 'high', 'trailingPE': 'low', 'forwardPE': 'low',
            'priceToSalesTrailing12Months': 'low', 'bookValue': 'high',
            'pegRatio': 'low', 'dividendYield': 'high', 'debtToEquity': 'low',
            'returnOnEquity': 'high', 'momentum': 'high', 'portfolioDiversity': 'high',
            'beta': 'low', 'currentRatio':'high', 'quickRatio': 'high', 
            'freeCashflow':'high', 'operatingMargins': 'high', 'ebitdaMargins': 'high',
            'grossMargins': 'high', 'payoutRatio': 'low', 'priceToBook': 'low', 
            'enterpriseToRevenue': 'low', 'enterpriseToEbitda': 'low', 
            'earningsQuarterlyGrowth': 'high', 'revenueGrowth': 'high', 
            'returnOnAssets': 'high', 'operatingCashflow': 'high',
            'averageVolume': 'high', 'volumeChange': 'high', 'sharpe_ratio': 'high'
        }
        self.initialize_weights()

    def initialize_weights(self):
        self.weights = {
            ticker: 1.0 / len(self.market_data) for ticker in self.market_data.index
        }
    
    def normalize_metric(self, series, direction):
        min_val = series.min()
        max_val = series.max()
        if direction == 'high':
            return (series - min_val) / (max_val - min_val)
        return 1 - (series - min_val) / (max_val - min_val)
    
    def normalize_scores(self, dataframe, columns):
        for col in columns:
            min_val = dataframe[col].min()
            max_val = dataframe[col].max()
            dataframe[col] = (dataframe[col] - min_val) / (max_val - min_val)
    
    def calculate_portfolio_diversity(self):
        # Merge 'portfolio_data' with 'market_data' directly on their indices
        self.portfolio_data = self.portfolio_data.merge(
            self.market_data[['currentPrice']],
            left_index=True,
            right_index=True,
            how='left'
        )
        # Calculate total cost and current market value per stock
        self.portfolio_data['total_cost'] = self.portfolio_data['stocks_owned'] * \
            self.portfolio_data['average_cost']
        total_portfolio_value = self.portfolio_data['total_cost'].sum()
        self.portfolio_data['dollar_value'] = self.portfolio_data['stocks_owned'] * \
            self.portfolio_data['currentPrice']
        # Calculate a discount score where larger scores are incentivised
        self.portfolio_data['discount_score'] = (
            self.portfolio_data['average_cost'] - self.portfolio_data['currentPrice']
        ) / self.portfolio_data['average_cost']
        # Balance score where larger scores are deincentivized
        self.portfolio_data['balance_score'] = 1 / (
            self.portfolio_data['dollar_value'] / total_portfolio_value
        )
        # Normalize scores to range between 0 and 1
        self.normalize_scores(
            self.portfolio_data, ['discount_score', 'balance_score']
        )
        # Calculate 'portfolioDiversity' as a weighted sum of 'discount_score' and 'balance_score'
        self.portfolio_data['portfolioDiversity'] = (
            (0.7 * self.portfolio_data['discount_score']) + \
                (0.3 * self.portfolio_data['balance_score'])
        )
        # Merge the portfolio diversity back into the market data
        self.market_data = self.market_data.merge(
            self.portfolio_data[['portfolioDiversity']],
            left_index=True,
            right_index=True,
            how='left'
        )

    def calculate_momentum(self):
        latest_date = pd.Timestamp('today').floor('D') - pd.DateOffset(days=1)
        end_date = latest_date - pd.DateOffset(days=21)
        start_date = end_date - pd.DateOffset(days=230)
        percent_changes = {}
        for ticker, data in self.historical_data.items():
            data = data.sort_index()
            # Find the nearest valid start and end dates
            valid_start_date = find_nearest_date(start_date, data.index)
            valid_end_date = find_nearest_date(end_date, data.index)
            # Calculate percent change if both dates are found
            if pd.notna(valid_start_date) and pd.notna(valid_end_date) \
                and valid_end_date > valid_start_date:
                start_close = data.loc[valid_start_date, 'Close']
                end_close = data.loc[valid_end_date, 'Close']
                percent_change = (end_close - start_close) / start_close
            else:
                percent_change = np.nan
            percent_changes[ticker] = percent_change         
            # Map the percent change (momentum) to the market_data DataFrame
            if ticker in self.market_data.index:
                self.market_data.loc[ticker, 'momentum'] = percent_change
        # Normalize the momentum scores directly within market_data
        self.normalize_scores(self.market_data, ['momentum'])
        
    def calculate_volume_metrics(self):
        for ticker, data in self.historical_data.items():
            if 'Volume' in data.columns:
                data['averageVolume'] = data['Volume'].rolling(window=50).mean()
                data['relativeVolume'] = data['Volume'] / data['averageVolume']
                data['volumeChange'] = data['Volume'].pct_change()
            # Update market data
            for metric in ['averageVolume', 'relativeVolume', 'volumeChange']:
                    self.market_data.at[ticker, metric] = data[metric].dropna().iloc[-1] \
                    if not data[metric].dropna().empty else np.nan
        self.normalize_scores(self.market_data, ['averageVolume', 'relativeVolume', 'volumeChange'])
    
    def calculate_sharpe_ratio(self, risk_free_rate=0.01):
        # formula for daily risk free rate is below
        daily_risk_free_rate = (1 + risk_free_rate) ** (1/252) - 1
        for ticker, data in self.historical_data.items():
            if 'Close' in data.columns:
                # Calculate returns as a pct change
                daily_returns = data['Close'].pct_change()
                excess_daily_returns = daily_returns - daily_risk_free_rate
                # Calculate the mean and standard deviation of excess daily returns
                mean_excess_returns = excess_daily_returns.mean()
                std_excess_returns = excess_daily_returns.std()
                # Calculate Sharpe Ratio and annualize for benchmarch comparison
                if std_excess_returns > 0:
                    sharpe_ratio = mean_excess_returns / std_excess_returns
                    annual_sharpe_ratio = sharpe_ratio * (252 ** 0.5)
                    self.market_data.at[ticker, 'sharpe_ratio'] = annual_sharpe_ratio
                else:
                    self.market_data.at[ticker, 'sharpe_ratio'] = np.nan
            else:
                self.market_data.at[ticker, 'sharpe_ratio'] = np.nan
        # normalize scores
        self.normalize_scores(self.market_data, ['sharpe_ratio'])
                
    def calculate_all_metrics(self):
        self.calculate_volume_metrics()
        for metric, direction in self.metrics.items():
            if metric in self.market_data.columns:
                self.market_data[metric] = self.normalize_metric(
                    self.market_data[metric], direction
                )
        # Calculate fundamental score as the mean of all metrics
        self.market_data['fundamental_score'] = self.market_data[
            list(self.metrics.keys())
        ].mean(axis=1)
        
    def apply_strategy(self):
        
        self.calculate_portfolio_diversity()
        self.calculate_momentum()
        self.calculate_sharpe_ratio()
        self.calculate_all_metrics()

        # Normalize the fundamental scores for proportional adjustment
        max_score = self.market_data['fundamental_score'].max()
        min_score = self.market_data['fundamental_score'].min()
        normalized_scores = (
            self.market_data['fundamental_score'] - min_score
        ) / (max_score - min_score)
        
        # Adjust initial weights based on normalized fundamental scores
        self.weights = {
            ticker: (self.weights[ticker] * normalized_scores.loc[ticker])
            for ticker in self.market_data.index
        }
        # Normalize weights to ensure they sum to 1
        total_weight = sum(self.weights.values())
        self.weights = {
            ticker: weight / total_weight for ticker, weight in self.weights.items()
        }                     

In [13]:
from scipy.signal import argrelextrema
class SupportResistance:
    def __init__(self, historical_data, order=10):
        """
        Initializes the class with historical data.
        :param historical_data: A Dictionary of DataFrames containing 'Close' prices for each ticker.
        :param order: How many points on each side to use for the local extrema calculation.
        """
        self.data = historical_data
        self.order = order
    
    def calculate_supports(self, ticker):
        """
        Identifies support levels using local minima.
        """
        prices = self.data[ticker]['Close']
        local_minima = argrelextrema(prices.values, np.less_equal, order=self.order)[0]
        supports = prices.iloc[local_minima]
        return supports.dropna()

    def calculate_resistances(self, ticker):
        """
        Identifies resistance levels using local maxima.
        """
        prices = self.data[ticker]['Close']
        local_maxima = argrelextrema(prices.values, np.greater_equal, order=self.order)[0]
        resistances = prices.iloc[local_maxima]
        return resistances.dropna()

    def find_levels(self):
        """
        Calculate and return support and resistance levels for each ticker in the historical data.
        """
        supports = {}
        resistances = {}
        for ticker in self.data:
            supports[ticker] = self.calculate_supports(ticker)
            resistances[ticker] = self.calculate_resistances(ticker)
        return supports, resistances

In [14]:
import numpy as np
import pandas as pd


class TechnicalAnalysis:
    def __init__(self, historical_data, max_years=3):
        self.historical_data = historical_data
        self.limit_data_to_recent_years(max_years)
        self.windows = self.calculate_volatility_based_window()
        
    def limit_data_to_recent_years(self, years):
        """
        Limits the data to the most recent years
        """
        current_date = pd.to_datetime('today').tz_localize('America/New_York')
        cutoff_date = current_date - pd.DateOffset(years=years)
        limited_data = {}  # Dictionary to store limited data for each ticker
        for ticker, data in self.historical_data.items():
            limited_data[ticker] = data[data.index >= cutoff_date]
        self.historical_data = limited_data 
        
    def calculate_volatility_based_window(self):
        """
        Calculates a dynamic window size based on volatility for each stock in the portfolio
        """
        window_sizes = {}
        for ticker, data in self.historical_data.items():
            recent_volatility = data['Close'].rolling(window=30).std().dropna().iloc[-1]
            overall_volatility = data['Close'].std()
            window_sizes[ticker] = max(10, int(50/2)) if recent_volatility > overall_volatility else 50
        return window_sizes
        
    def calculate_sma(self, ticker, window=None):
        """Calculates the Simple Moving Average percentage change 
        (Primary Trading Signal)
        """
        if window is None:
            window = self.windows[ticker]
        sma_values = self.historical_data[ticker]['Close'].rolling(window=window).mean()
        sma_pct_change = sma_values.pct_change()
        return sma_pct_change
    
    def calculate_ema(self, ticker, window=None):
        """
        Calculates exponetial moving average (Primary Trading Signal)
        """
        if window is None:
            window = self.windows[ticker]
        ema_values = self.historical_data[ticker]['Close'].ewm(span=window, adjust=False).mean()
        return ema_values
    
    def calculate_moving_volatility(self, ticker, window=None):
        """Calculate the moving standard deviation over a window
        """
        if window is None:
            window = self.windows[ticker]
        return self.historical_data[ticker]['Close'].rolling(window=window).std()
    
    def calculate_rsi(self, ticker, window=14):
        """Calculates Relative Strenght Index, calculated over 14 days.
        Gains and Losses are averaged out over 14 days.
        RS = Average Gain/ Average Loss
        RSI = 100 - (100/(1+RS))
        """
        delta = self.historical_data[ticker]['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    def calculate_macd(self, ticker, fast=12, slow=26, signal=9):
        """
        Calculate Moving Average Convergence Divergence (MACD). 
        Based on the difference between short term (fast) and 
        long term (slow) exponential moving averages
        macd_signal is the smoothening factor: \the difference can signal trends
        """
        ema_fast = self.calculate_ema(ticker, window=fast)
        ema_slow = self.calculate_ema(ticker, window=slow)
        macd = ema_fast - ema_slow
        macd_signal = macd.ewm(span=signal, adjust=False).mean()
        # signal line smoothens the MACD line itself
        return macd, macd_signal
    
    def calculate_bollinger_bands(self, ticker, window=20):
        """Calculate Bollinger Bands
        """
        sma = self.calculate_sma(ticker, window=window)
        std = self.historical_data[ticker]['Close'].rolling(window=window).std()
        upper_band = sma + (std * 2)
        lower_band = sma - (std * 2)
        return upper_band, lower_band
    
    def calculate_percent_b(self, ticker, window=20):
        """
        a normalized representation of where the last closing price falls relative to the Bollinger Bands.
        """
        upper_band, lower_band = self.calculate_bollinger_bands(ticker, window)
        current_price = self.historical_data[ticker]['Close'].iloc[-1]
        percent_b = (current_price - lower_band) / (upper_band - lower_band)
        return percent_b

    def calculate_obv(self, ticker):
        """
        Calculate On-Balance Volume (OBV)
        """
        close = self.historical_data[ticker]['Close']
        volume = self.historical_data[ticker]['Volume']
        obv = (np.sign(close.diff()) * volume).fillna(0).cumsum()
        obv_prev = obv.shift(1)
        return obv, obv_prev
    
    def calculate_recent_trend(self, ticker):
        # Based on short and long term moving averages
        short_term_window = 20
        long_term_window = 90
        # Calculate short and long term moving averages
        short_term_ma = self.historical_data[ticker]['Close'].rolling(
            window=short_term_window
        ).mean()
        # Exponential moving average for long term
        long_term_ma = self.historical_data[ticker]['Close'].ewm(
            span=long_term_window, adjust=False
        ).mean()
        # trend signal shows the relationship between short and long term mas
        trend_signal = (short_term_ma > long_term_ma).astype(int) - \
            (short_term_ma < long_term_ma).astype(int)
        trend_description = trend_signal.map(
            {1: 'up', -1: 'down', 0: 'flat'}
        ).fillna('flat')
        return trend_description

In [15]:
class AnalysisImplementor:
    def __init__(self, historical_data, market_data) -> None:
        
        self.historical_data = historical_data
        self.market_data = market_data
        self.technical_analysis = TechnicalAnalysis(self.historical_data)
        self.candlestick_patterns = CandlestickPatterns(self.historical_data)
        self.support_resistance = SupportResistance(self.historical_data)
        self.markov_models = {
            ticker: MarkovModel(data) for ticker, data in self.historical_data.items()
        }
    
    def implement_all_analysis(self):
        self.implement_technical_analysis()
        self.implement_pattern_analysis()
        self.integrate_markov_predictions()
        
    def implement_technical_analysis(self):
        """ 
        Applies most recent technical analysis to market data
        """
        for ticker in self.technical_analysis.historical_data.keys():
            self.market_data.loc[ticker, 'sma'] = \
                self.technical_analysis.calculate_sma(ticker).iloc[-1]
            self.market_data.loc[ticker, 'ema'] = \
                self.technical_analysis.calculate_ema(ticker).iloc[-1]
            self.market_data.loc[ticker, 'volatility'] = \
                self.technical_analysis.calculate_moving_volatility(ticker).iloc[-1]
            self.market_data.loc[ticker, 'rsi'] = \
                self.technical_analysis.calculate_rsi(ticker).iloc[-1]
            macd, macd_signal = self.technical_analysis.calculate_macd(ticker)
            self.market_data.loc[ticker, 'macd'] = macd.iloc[-1]
            self.market_data.loc[ticker, 'macd_signal'] = macd_signal.iloc[-1]
            upper_band, lower_band = \
                self.technical_analysis.calculate_bollinger_bands(ticker)
            self.market_data.loc[ticker, 'upper_bollinger'] = upper_band.iloc[-1]
            self.market_data.loc[ticker, 'lower_bollinger'] = lower_band.iloc[-1]
            obv, obv_prev = self.technical_analysis.calculate_obv(ticker)
            self.market_data.loc[ticker, 'obv'] = obv.iloc[-1]
            self.market_data.loc[ticker, 'obv_previous'] = obv_prev.iloc[-1]
            self.market_data.loc[ticker, 'trend'] = \
                self.technical_analysis.calculate_recent_trend(ticker).iloc[-1]
    
    def implement_pattern_analysis(self):
        """Adds candlestick patterns and support/resistance levels to the market data
        """
        self.market_data['supports'] = None
        self.market_data['resistances'] = None
        
        supports, resistances = self.support_resistance.find_levels()
        for ticker in supports:
            self.market_data.loc[ticker, 'supports'] = \
                supports[ticker].iloc[-1] if not supports[ticker].empty else None
            self.market_data.loc[ticker, 'resistances'] = \
                resistances[ticker].iloc[-1] if not resistances[ticker].empty else None
        all_patterns = self.candlestick_patterns.find_patterns()
        for ticker, patterns in all_patterns.items():
            # Get the last date's data for each ticker's patterns
            last_patterns = patterns.iloc[-1]  # Assuming you want the last row's pattern data

            # Ensure the ticker is already an index in market_data or has an equivalent key
            if ticker in self.market_data.index:
                for pattern_name in patterns.columns:
                    # Safely add pattern columns to market_data if not already present
                    if pattern_name not in self.market_data.columns:
                        self.market_data[pattern_name] = np.nan  # Initialize if not existing

                    # Update the specific pattern entry for the ticker in market_data
                    self.market_data.loc[ticker, pattern_name] = float(last_patterns[pattern_name])


    def integrate_markov_predictions(self):
        """
        Integrate Markov model predictions into market data
        """
        self.market_data['markov_state'] = None
        for ticker, model in self.markov_models.items():
            if ticker in self.market_data.index:
                self.market_data.at[ticker, 'markov_state'] = model.predict_next_state()

In [16]:
class BudgetAllocator:
    def __init__(
        self, budget, market_data, historical_data, 
        portfolio_data, weights
    ) -> None:
        self.budget = budget
        self.market_data = market_data
        self.historical_data = historical_data
        self.portfolio_data = portfolio_data
        self.portfolio_analyzer = PortfolioAnalysisEngine(
            self.portfolio_data, self.market_data, self.historical_data
        )
        self.weights = weights
        self.minimum_allocation = 5
        
    def allocate_budget(self):
        """
        Allocates the budget based on weights
        """
        initial_allocations = {
            ticker: round(self.budget * self.weights[ticker], 2) \
                for ticker in self.weights
        }
        allocations = {
            ticker: value if value >= self.minimum_allocation else 0 \
                for ticker, value in initial_allocations.items()
        }
        # Removing allocations that are less than the minimum threshold
        for ticker, value in list(allocations.items()):
            if 0 == value:
                allocations.pop(ticker)
        
        total_allocated = sum(allocations.values())
        remaining_budget = self.budget - total_allocated
        
        if remaining_budget > 0:
            allocations = self.redistribute_remaining_budget(allocations, remaining_budget)
        # Fix rounding errors
        self.adjust_for_rounding_errors(allocations)
        return allocations
    
    def redistribute_remaining_budget(self, allocations, remaining_budget):
        """
        Redistributes any remaining budget proportionally to stocks 
        above the minimum threshold

        Args:
            allocations (dict): Money allocated for each stock 
            remaining_budget (float): budget remaining after eliminating 
            money alloted to stocks under minimum threshold
        """
        total_weight_allocated = sum(
            self.weights[ticker] for ticker in allocations
        )
        additional_allocations = {
            ticker: (self.weights[ticker] / total_weight_allocated) * \
                remaining_budget for ticker in allocations
        }
        for ticker, additional_amount in additional_allocations.items():
            allocations[ticker] += round(additional_amount, 2)
            
        return allocations
    
    def adjust_for_rounding_errors(self, allocations):
        """
        Adjusts the final allocations to match the budget exactly, ensuring no negative allocations.

        Args:
            allocations (dict): Money allocated for each stock 
        """
        total_allocation = sum(allocations.values())
        rounding_error = self.budget - total_allocation
        if rounding_error != 0:
            # If rounding error is positive, add it to the ticker with the max allocation
            if rounding_error > 0:
                ticker_with_max_allocation = max(allocations, key=allocations.get)
                allocations[ticker_with_max_allocation] += rounding_error
            else:
                
                sorted_tickers = sorted(allocations, key=allocations.get, reverse=True)
                for ticker in sorted_tickers:
                    max_adjustable = allocations[ticker] - self.minimum_allocation
                    adjustment = max(rounding_error, - max_adjustable)
                    allocations[ticker] += adjustment
                    rounding_error -= adjustment
                    if rounding_error == 0:
                        break

            # Ensure all allocations are rounded to 2 decimal places after adjustments
            for ticker in allocations:
                allocations[ticker] = round(allocations[ticker], 2)

    
    def set_minimum_allocation(self, minimum_allocation):
        """
        Allows setting a new minimum allocation amount if needed
        Args:
            minimum_allocation (int): Lowest investment permitted per stock
        """
        self.minimum_allocation = minimum_allocation

In [17]:
class StrategyExecutor:
    def __init__(self, market_data, portfolio_analyzor) -> None:
        self.market_data = market_data
        self.portfolio_analyzor = portfolio_analyzor
        self.portfolio_analyzor.apply_strategy()
        self.weights = self.portfolio_analyzor.weights
    
    def normalize_weights(self):
        total_weight = sum(self.weights.values())
        for key in self.weights:
            self.weights[key] /= total_weight
        
    def adjust_weights(self):
        """
        Adjusts weights based on technical indicators, market patterns
        and Markov model predictions
        """
        for ticker, data in self.market_data.iterrows():
            total_adjustment = self.calculate_adjustments(data, ticker)
            self.weights[ticker] *= (1 + total_adjustment)
        self.normalize_weights()
    
    def calculate_adjustments(self, data, ticker):
        """Calculates adjustment factors based on secondary signals for trading."""
        adjustment_factors = {
            'rsi': self.adjust_rsi(data),
            'macd': self.adjust_macd(data),
            'bollinger': self.adjust_bollinger(data),
            'price_vs_sma': self.adjust_price_vs_sma(data),
            'price_vs_ema': self.adjust_price_vs_ema(data),
            'volatility': self.adjust_volatility(data),
            'markov': self.adjust_markov(data),
            'support_resistance': self.adjust_support_resistance(data),
            'pattern_weight': self.calculate_pattern_weights(ticker)
        }
        return sum(adjustment_factors.values())
    
    def adjust_rsi(self, data):
        """A higher relative strength index indicates that
        a stock is overbought
        Args:
            data (pd.Series): Series representing all the columns for the row
        Returns:
            float: weight adjustment based on value of RSI
        """
        if 'rsi' in data:
            return -0.05 if data['rsi'] > 70 else 0.05 if data['rsi'] < 30 else 0
        return 0
    
    def adjust_macd(self, data):
        """Moving average Convergance Divergance. When the macd line
        crosses above the signal line, the stock is bullish
        when it crosses below the signal line, the stock is bearish

        Args:
            data (pd.Series): Series representing all the columns for the row
        """
        if 'macd' in data and 'macd_signal' in data:
            return 0.05 if (data['macd'] > data['macd_signal']) else -0.05
        return 0
    
    def adjust_bollinger(self, data):
        """
        Adjusts weights based on Bollinger Bands indicators.
        Args:
            data (pd.Series): Series representing all the columns for the row
        Returns:
            float: weight adjustment based on Bollinger Bands analysis
        """
        adjustment = 0
        current_price = data['currentPrice']
        if 'upper_bollinger' in data and 'lower_bollinger' in data \
            and 'sma' in data:
            upper_band = data['upper_bollinger']
            lower_band = data['lower_bollinger']
            sma = data['sma'] 
            # Check if current price is above the upper Bollinger Band
            if current_price > upper_band:
                adjustment += 0.05
            # Check if current price is below the lower Bollinger Band
            elif current_price < lower_band:
                adjustment -= 0.05
            # Calculate bandwidth
            bandwidth = (upper_band - lower_band) / sma
            # Increase adjustment if the bandwidth is very high, indicating high volatility
            if bandwidth > 0.10:  # Threshold for high volatility
                adjustment += 0.03
            elif bandwidth < 0.05:  # Threshold for low volatility
                adjustment -= 0.03
        return adjustment

    
    def adjust_price_vs_sma(self, data):

        if 'sma' in data and 'currentPrice' in data:
            return 0.05 if data['currentPrice'] > data['sma'] else -0.05
        return 0

    def adjust_price_vs_ema(self, data):
        """
        Adjust based on the relationship between the current price and the exponential moving average (EMA).
        Returns 0.05 if the current price is above the EMA, -0.05 if below, or 0 if EMA is not available.
        """
        if 'ema' in data and 'currentPrice' in data:
            return 0.05 if data['currentPrice'] > data['ema'] else -0.05
        return 0
    
    def adjust_volatility(self, data):
        if 'volatility' in data:
            mean_volatility = self.market_data['volatility'].mean()
            return -0.05 if data['volatility'] > mean_volatility else 0.05
        return 0
    
    def adjust_markov(self, data):
        """A downtrend prediction is 0 or 1 while an uptrend prediction is 2 or 3
        """
        if 'markov_state' in data:
            return -0.10 if data['markov_state'] in [0, 1] else 0.10 if data['markov_state'] in [2, 3] else 0
        return 0
    
    def adjust_support_resistance(self, data):
        """ Adjust weights based on proximity of current price to supports and resistances
        """
        if 'currentPrice' in data:
            current_price = data['currentPrice']
            support_level = data.get('supports', current_price)
            resistance_level = data.get('resistances', current_price)
            # Calculate proximity 
            support_distance_proximity = (current_price - support_level) if current_price != 0 else 0
            
            resistance_distance_proximity = (resistance_level - current_price) / current_price if current_price != 0 else 0
            # Adjust weights based on these factors
            support_adjustment = 0.1 * (1 - max(0, support_distance_proximity)) if current_price >= support_level else 0
            resistance_adjustment = -0.1 * (1 - max(0, resistance_distance_proximity)) if current_price <= resistance_level else 0
            return support_adjustment + resistance_adjustment
        return 0
    
    def calculate_pattern_weights(self, ticker):
        """"
        Add an adjustment for current candlestick patterns
        """
        positive_patterns = [
            'Hammer', 'Inverted Hammer', 'Morning Star',
            'Three White Soldiers', 'Piercing Line', 'Engulfing', 'Doji'
        ]
        negative_patterns = [
            'Shooting Star', 'Dark Cloud Cover', 'Evening Star',
            'Three Black Crows', 'Harami'
        ]
        pattern_weights = sum(
            [0.03 if self.market_data.at[ticker, pattern] else 0 for pattern in positive_patterns]
        ) - sum(
            [0.03 if self.market_data.at[ticker, pattern] else 0 for pattern in negative_patterns]
        )
        return pattern_weights

In [18]:
class InvestmentDecisionMaker:
    
    def __init__(self, historical_data, market_data, portfolio_data, budget):
        
        self.historical_data = historical_data
        self.market_data = market_data
        self.portfolio_data = portfolio_data
        self.portfolio_analyzer = PortfolioAnalysisEngine(portfolio_data, market_data, historical_data)
        self.analysis_implementor = AnalysisImplementor(historical_data, market_data)
        self.strategy_exeutor = StrategyExecutor(market_data, self.portfolio_analyzer)
        self.budget_allocator = None  
        self.budget = budget

    def execute_strategy(self):
        # Perform all market and financial analyses
        self.analysis_implementor.implement_all_analysis()
        self.strategy_exeutor.adjust_weights()
        self.budget_allocator = BudgetAllocator(self.budget, self.market_data, self.historical_data, self.portfolio_data, self.strategy_exeutor.weights)
        
        # Allocate budget based on the adjusted weights
        allocations = self.budget_allocator.allocate_budget()

        return allocations

In [19]:
decision = InvestmentDecisionMaker(historical_data, market_data, portfolio, 100)

In [20]:
decision.execute_strategy()

{'TSLA': 5.14,
 'AAPL': 16.95,
 'PYPL': 7.33,
 'NFLX': 7.48,
 'AMZN': 11.59,
 'MSFT': 15.27,
 'SBUX': 5.13,
 'NVDA': 5.0,
 'GM': 6.14,
 'COKE': 7.83,
 'WMT': 6.33,
 'KO': 5.81}

In [21]:
candlestick = CandlestickPatterns(historical_data)

In [22]:
historical_data

{'TSLA':                                  Open        High         Low       Close  \
 Date                                                                        
 2014-04-21 00:00:00-04:00   13.138667   13.746667   12.933333   13.625333   
 2014-04-22 00:00:00-04:00   13.757333   14.622000   13.667333   14.576000   
 2014-04-23 00:00:00-04:00   14.422000   14.449333   13.800000   13.866000   
 2014-04-24 00:00:00-04:00   14.054000   14.186667   13.546667   13.857333   
 2014-04-25 00:00:00-04:00   13.466667   13.780000   13.176667   13.323333   
 ...                               ...         ...         ...         ...   
 2024-04-15 00:00:00-04:00  170.240005  170.690002  161.380005  161.479996   
 2024-04-16 00:00:00-04:00  156.740005  158.190002  153.750000  157.110001   
 2024-04-17 00:00:00-04:00  157.639999  158.330002  153.779999  155.449997   
 2024-04-18 00:00:00-04:00  151.250000  152.199997  148.699997  149.929993   
 2024-04-19 00:00:00-04:00  148.940002  150.929993  148.

In [23]:
portfolio_analyzer = PortfolioAnalysisEngine(portfolio, market_data, historical_data)
strategy_exeutor = StrategyExecutor(market_data, portfolio_analyzer)

In [24]:
strategy_exeutor.weights

{'TSLA': 0.03918718137502091,
 'GOOGL': 0.1213065950367527,
 'AAPL': 0.10981234813001667,
 'PYPL': 0.047333918899013755,
 'NFLX': 0.06462817501964876,
 'AMZN': 0.09724998071089376,
 'MSFT': 0.11718553059080042,
 'SBUX': 0.045699388507702075,
 'NVDA': 0.13273007705455867,
 'GM': 0.03722644609401309,
 'CAVA': 0.0,
 'SOXX': 0.02810504982189212,
 'COKE': 0.07139627828362156,
 'WMT': 0.04239351735417405,
 'KO': 0.045745513121891575}