In [21]:
#Logic Behind Strat
#Re-run the K-Means clustering after each earnings update to reassign clusters.
#Assign buy/sell/hold signals based on changes in cluster membership.
#Buy: When a stock moves into a high-growth, undervalued cluster.
#Sell: When a stock moves into a low-growth, overvalued cluster.
#Hold: When the stock remains in a cluster indicating balanced growth and valuation.
#Allocate capital dynamically based on cluster signals.
#Rebalance the portfolio quarterly.

#FIX AND MAKE INTO A BIG DATAFRAME LIKE NORMAL

import yfinance as yf
import pandas as pd
import numpy as np

from sklearn.cluster import KMeans
# Define the stock tickers
tickers = ["AAPL", "MSFT", "GOOGL"]
prices = yf.download(tickers, start="2018-01-01")['Adj Close']
# Initialize a dictionary to store EPS data for each stock
eps_dict = {ticker: [] for ticker in tickers}

# Iterate through each ticker to get quarterly financials and EPS data
for ticker in tickers:
    stock = yf.Ticker(ticker)
    
    # Get quarterly financials (Net Income)
    quarterly_financials = stock.quarterly_financials
    
    # Get current shares outstanding
    shares_outstanding = stock.info.get("sharesOutstanding")
    
    # Collect EPS for each quarter
    if shares_outstanding:  # Ensure shares outstanding is available
        for date, net_income in quarterly_financials.loc["Net Income"].items():
            if net_income:  # Ensure net income is not None
                eps = net_income / shares_outstanding
                eps_dict[ticker].append({"Quarter End": date, "EPS": eps})

# Now, we need to create a DataFrame from this dictionary
# We will align the dates for each stock and fill missing values with NaN if necessary

# Create an empty DataFrame
eps_df = pd.DataFrame()

# Iterate over the dictionary and create columns for each stock
for ticker, data in eps_dict.items():
    # Convert data for the current stock into a DataFrame
    temp_df = pd.DataFrame(data)
    temp_df.set_index("Quarter End", inplace=True)
    
    # Rename the column to the ticker symbol
    temp_df.rename(columns={"EPS": ticker}, inplace=True)
    
    # Join with the main DataFrame
    eps_df = eps_df.join(temp_df, how="outer")



eps_df.rename_axis("Date", inplace=True)
# Convert the index to datetime
eps_df.index = pd.to_datetime(eps_df.index)

# Calculate Earnings Growth
earnings_growth = eps_df.pct_change().shift(-1)

# Convert the index to datetime
prices.index = pd.to_datetime(prices.index)
# Convert the index of the DataFrame with tz-aware index to tz-naive
prices.index = prices.index.tz_localize(None)
pe_ratio = prices/eps_df
data = pd.concat([eps_df, earnings_growth, prices, pe_ratio], axis=1, join='outer')
data.fillna(method='ffill', inplace=True)
data = data.fillna(0)



original_columns = data.columns.tolist()

[*********************100%***********************]  3 of 3 completed
  data.fillna(method='ffill', inplace=True)


In [22]:
data

Unnamed: 0_level_0,AAPL,MSFT,GOOGL,AAPL,MSFT,GOOGL,AAPL,GOOGL,MSFT,AAPL,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-02,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.524345,53.466679,79.633507,0.000000,0.000000,0.000000
2018-01-03,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.517284,54.378876,80.004112,0.000000,0.000000,0.000000
2018-01-04,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.705498,54.590103,80.708267,0.000000,0.000000,0.000000
2018-01-05,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.168934,55.313984,81.708885,0.000000,0.000000,0.000000
2018-01-08,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.016033,55.509274,81.792274,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-06,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,245.000000,196.869995,427.850006,238.742588,36.802866,129.437623
2025-01-07,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.210007,195.490005,422.369995,238.742588,36.802866,129.437623
2025-01-08,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.699997,193.949997,424.559998,238.742588,36.802866,129.437623
2025-01-10,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,236.850006,192.039993,418.950012,238.742588,36.802866,129.437623


In [23]:
# Define the suffixes for each group of 10 columns
suffixes = ["_EPS", "_Growth", "_Price", "_PE"]
# Create a new list for the updated column names
new_columns = original_columns.copy()

# Loop through each group of 10 columns and apply the suffix
for i, suffix in enumerate(suffixes):
    start = i * 3
    end = (i + 1) * 3
    for idx in range(start, end):
        if idx < len(new_columns):  # Ensure index is within bounds
            new_columns[idx] = f"{original_columns[idx]}{suffix}"

# Update the DataFrame column names
data.columns = new_columns
data

Unnamed: 0_level_0,AAPL_EPS,MSFT_EPS,GOOGL_EPS,AAPL_Growth,MSFT_Growth,GOOGL_Growth,AAPL_Price,GOOGL_Price,MSFT_Price,AAPL_PE,GOOGL_PE,MSFT_PE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-01-02,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.524345,53.466679,79.633507,0.000000,0.000000,0.000000
2018-01-03,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.517284,54.378876,80.004112,0.000000,0.000000,0.000000
2018-01-04,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.705498,54.590103,80.708267,0.000000,0.000000,0.000000
2018-01-05,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.168934,55.313984,81.708885,0.000000,0.000000,0.000000
2018-01-08,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.016033,55.509274,81.792274,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-06,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,245.000000,196.869995,427.850006,238.742588,36.802866,129.437623
2025-01-07,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.210007,195.490005,422.369995,238.742588,36.802866,129.437623
2025-01-08,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.699997,193.949997,424.559998,238.742588,36.802866,129.437623
2025-01-10,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,236.850006,192.039993,418.950012,238.742588,36.802866,129.437623


In [34]:
df = data
import matplotlib.pyplot as plt

# Create a new DataFrame with the relevant features
features_df = pd.DataFrame({
    'AAPL_PE': df['AAPL_PE'],
    'AAPL_Growth': df['AAPL_Growth'],
    'MSFT_PE': df['MSFT_PE'],
    'MSFT_Growth': df['MSFT_Growth'],
    'GOOGL_PE': df['GOOGL_PE'],
    'GOOGL_Growth': df['GOOGL_Growth']
})

# Apply K-means clustering
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(features_df[['AAPL_PE', 'AAPL_Growth']])
aapl_cluster_assignments = kmeans.labels_

kmeans.fit(features_df[['MSFT_PE', 'MSFT_Growth']])
msft_cluster_assignments = kmeans.labels_

kmeans.fit(features_df[['GOOGL_PE', 'GOOGL_Growth']])
googl_cluster_assignments = kmeans.labels_

# Create new columns in the original DataFrame with the cluster assignments
df['AAPL_Cluster'] = aapl_cluster_assignments
df['MSFT_Cluster'] = msft_cluster_assignments
df['GOOGL_Cluster'] = googl_cluster_assignments

# Define a function to implement the strategy
def implement_strategy(df):
    buy_signals_aapl = []
    sell_signals_aapl = []
    buy_signals_msft = []
    sell_signals_msft = []
    buy_signals_googl = []
    sell_signals_googl = []

    aapl_holding = True
    msft_holding = True
    googl_holding = True
    
    for i in range(len(df)):
        if df['AAPL_Cluster'].iloc[i] == 0 and not aapl_holding:
            buy_signals_aapl.append(df.index[i])
            sell_signals_aapl.append(np.nan)
            aapl_holding = True
        elif df['AAPL_Cluster'].iloc[i] == 1 and aapl_holding:
            buy_signals_aapl.append(np.nan)
            sell_signals_aapl.append(df.index[i])
            aapl_holding = False
        else:
            buy_signals_aapl.append(np.nan)
            sell_signals_aapl.append(np.nan)
            
        if df['MSFT_Cluster'].iloc[i] == 0 and not msft_holding:
            buy_signals_msft.append(df.index[i])
            sell_signals_msft.append(np.nan)
            msft_holding = True
        elif df['MSFT_Cluster'].iloc[i] == 1 and msft_holding:
            buy_signals_msft.append(np.nan)
            sell_signals_msft.append(df.index[i])
            msft_holding = False
        else:
            buy_signals_msft.append(np.nan)
            sell_signals_msft.append(np.nan)
            
        if df['GOOGL_Cluster'].iloc[i] == 0 and not googl_holding:
            buy_signals_googl.append(df.index[i])
            sell_signals_googl.append(np.nan)
            googl_holding = True
        elif df['GOOGL_Cluster'].iloc[i] == 1 and googl_holding:
            buy_signals_googl.append(np.nan)
            sell_signals_googl.append(df.index[i])
            googl_holding = False
        else:
            buy_signals_googl.append(np.nan)
            sell_signals_googl.append(np.nan)
    
    return buy_signals_aapl, sell_signals_aapl, buy_signals_msft, sell_signals_msft, buy_signals_googl, sell_signals_googl

# Implement the strategy
buy_signals_aapl, sell_signals_aapl, buy_signals_msft, sell_signals_msft, buy_signals_googl, sell_signals_googl = implement_strategy(df)
# Implement the strategy
buy_signals_aapl, sell_signals_aapl, buy_signals_msft, sell_signals_msft, buy_signals_googl, sell_signals_googl = implement_strategy(df)
df

Unnamed: 0_level_0,AAPL_EPS,MSFT_EPS,GOOGL_EPS,AAPL_Growth,MSFT_Growth,GOOGL_Growth,AAPL_Price,GOOGL_Price,MSFT_Price,AAPL_PE,GOOGL_PE,MSFT_PE,AAPL_Cluster,MSFT_Cluster,GOOGL_Cluster
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-01-02,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.524345,53.466679,79.633507,0.000000,0.000000,0.000000,0,0,0
2018-01-03,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.517284,54.378876,80.004112,0.000000,0.000000,0.000000,0,0,0
2018-01-04,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,40.705498,54.590103,80.708267,0.000000,0.000000,0.000000,0,0,0
2018-01-05,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.168934,55.313984,81.708885,0.000000,0.000000,0.000000,0,0,0
2018-01-08,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,41.016033,55.509274,81.792274,0.000000,0.000000,0.000000,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-01-06,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,245.000000,196.869995,427.850006,238.742588,36.802866,129.437623,1,1,1
2025-01-07,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.210007,195.490005,422.369995,238.742588,36.802866,129.437623,1,1,1
2025-01-08,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,242.699997,193.949997,424.559998,238.742588,36.802866,129.437623,1,1,1
2025-01-10,0.974874,3.31774,4.501284,-0.312943,0.119396,0.113553,236.850006,192.039993,418.950012,238.742588,36.802866,129.437623,1,1,1
