In [2]:
from openai import OpenAI
import anthropic
from google import genai
from google.genai import types
import os

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

from collections import Counter
from datetime import datetime, timedelta

%matplotlib inline

In [3]:
# gpt models
with open('api_key/openai_api_key.txt', 'r', encoding='utf-8') as f:
    API_KEY = f.readline()
os.environ['OPENAI_API_KEY'] =API_KEY
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY']
) 

In [4]:
# OpenAI API parameters
max_tokens = 1024
n = 1
stop = None
temperature = 0.5

In [5]:
default_params = {
    "max_tokens": max_tokens,
    "temperature": temperature,
}

In [6]:
sector_name = 'industrials'
sector_name_full = 'Industrials'
sector_ticker = '^SP500-20'
full_ticker = 'SP500-20'

In [9]:
sp500_df = pd.read_csv('SP500.csv')  
companies = sp500_df[sp500_df['GICS Sector'] == sector_name_full]['Symbol']
companies

10      ADP
23     ALLE
27      AME
35      AOS
47     AXON
       ... 
469    VLTO
471    VRSK
478     WAB
487      WM
498     XYL
Name: Symbol, Length: 78, dtype: object

In [7]:
model = "gpt-4.1"

In [11]:
def generate_stocks_by_sector(sector_name, full_ticker, n_count, valid_tickers, recount=False):
       
    prompt1 = f"Using a range of investing principles taken from leading funds, create a theoretical fund comprising of at least X stocks (mention their tickers) from the {full_ticker} {sector_name} sector with the goal to outperform the {full_ticker} {sector_name} sector"

    stock_counter = Counter()

    num_stocks_per_iter = []

    for i in range(n_count):
        response1 = client.chat.completions.create(
            model=model,
            messages=[
            {"role": "system", "content": "You are a helpful  assistant."},
            {"role": "user", "content": prompt1},
            ],
            n=n,
            stop=stop,
            **(default_params if model != "o4-mini" else {})
        )

        coutput1 = response1.choices[0].message.content

        prompt2 = 'Extract only the ticker symbols of the stocks comprising the fund from the previous response:- "{input}". In your response to this prompt, list only the ticker symbols separated by spaces.'.format(input=coutput1)

        response2 = client.chat.completions.create(
            model=model,
            messages=[
            {"role": "system", "content": "You are a helpful  assistant."},
            {"role": "user", "content": prompt2},
            ],
            n=n,
            stop=stop,
            **(default_params if model != "o4-mini" else {})
        )

        coutput2 = response2.choices[0].message.content

        stock_tickers = coutput2.split()

        # stock_tickers = [ticker.replace(".", "-") for ticker in stock_tickers]

        stock_tickers = ["META" if ticker == "FB" else ticker for ticker in stock_tickers]
        print(stock_tickers)

        num_stocks_per_iter.append(len(stock_tickers))
        print(num_stocks_per_iter)

        stock_counter.update(stock_tickers)
        print(stock_counter)

    average = round(np.mean(num_stocks_per_iter))
    # Get a list of all the stocks, sorted by frequency (from most to least common)
    sorted_stocks = [stock for stock, _ in stock_counter.most_common()]

    valid_tickers = set(valid_tickers)

    valid_most_common_stocks = []

    for stock in sorted_stocks:
        # If the stock is valid and we still need more stocks to reach the number 15
        if stock in valid_tickers and len(valid_most_common_stocks) < 15:
            valid_most_common_stocks.append(stock)
        elif stock not in valid_tickers:
            print(f"The stock {stock} is not in the valid list of this S&P 500 sector and hence is being discarded.")

    if len(valid_most_common_stocks) < 15:
        print("There are not enough valid stocks to reach the desired number of 15 stocks.")

    print("The valid most common stocks are: ", valid_most_common_stocks)



In [12]:
generate_stocks_by_sector(sector_name, full_ticker, 10, companies)
print(f'Processed  sector {sector_name}')

['HON', 'UNP', 'CAT', 'DE', 'WM', 'LMT', 'ITW', 'ETN', 'CSX', 'PH']
[10]
Counter({'HON': 1, 'UNP': 1, 'CAT': 1, 'DE': 1, 'WM': 1, 'LMT': 1, 'ITW': 1, 'ETN': 1, 'CSX': 1, 'PH': 1})
['HON', 'UNP', 'LMT', 'CAT', 'DE', 'WM', 'PH', 'CSX', 'UPS', 'ITW', 'CTAS', 'FDX', 'EMR', 'NOC', 'CARR']
[10, 15]
Counter({'HON': 2, 'UNP': 2, 'CAT': 2, 'DE': 2, 'WM': 2, 'LMT': 2, 'ITW': 2, 'CSX': 2, 'PH': 2, 'ETN': 1, 'UPS': 1, 'CTAS': 1, 'FDX': 1, 'EMR': 1, 'NOC': 1, 'CARR': 1})
['UNP', 'ETN', 'LMT', 'DE', 'ITW', 'FAST', 'WM', 'RTX', 'PH', 'CARR']
[10, 15, 10]
Counter({'UNP': 3, 'DE': 3, 'WM': 3, 'LMT': 3, 'ITW': 3, 'PH': 3, 'HON': 2, 'CAT': 2, 'ETN': 2, 'CSX': 2, 'CARR': 2, 'UPS': 1, 'CTAS': 1, 'FDX': 1, 'EMR': 1, 'NOC': 1, 'FAST': 1, 'RTX': 1})
['HON', 'UNP', 'LMT', 'CARR', 'ETN', 'UPS', 'ROP', 'PH']
[10, 15, 10, 8]
Counter({'UNP': 4, 'LMT': 4, 'PH': 4, 'HON': 3, 'DE': 3, 'WM': 3, 'ITW': 3, 'ETN': 3, 'CARR': 3, 'CAT': 2, 'CSX': 2, 'UPS': 2, 'CTAS': 1, 'FDX': 1, 'EMR': 1, 'NOC': 1, 'FAST': 1, 'RTX': 1, 'R

In [13]:
sector_name = 'energy'
sector_name_full = 'Energy'
sector_ticker = '^GSPE'
full_ticker = 'SP500-10'

In [14]:
generate_stocks_by_sector(sector_name, full_ticker, 10, companies)
print(f'Processed  sector {sector_name}')

['XOM', 'CVX', 'COP', 'EOG', 'SLB', 'MPC', 'PSX', 'VLO', 'OXY', 'HAL']
[10]
Counter({'XOM': 1, 'CVX': 1, 'COP': 1, 'EOG': 1, 'SLB': 1, 'MPC': 1, 'PSX': 1, 'VLO': 1, 'OXY': 1, 'HAL': 1})
['XOM', 'CVX', 'COP', 'EOG', 'SLB', 'WMB', 'MPC']
[10, 7]
Counter({'XOM': 2, 'CVX': 2, 'COP': 2, 'EOG': 2, 'SLB': 2, 'MPC': 2, 'PSX': 1, 'VLO': 1, 'OXY': 1, 'HAL': 1, 'WMB': 1})
['XOM', 'CVX', 'COP', 'EOG', 'SLB', 'MPC', 'WMB', 'PSX', 'PXD']
[10, 7, 9]
Counter({'XOM': 3, 'CVX': 3, 'COP': 3, 'EOG': 3, 'SLB': 3, 'MPC': 3, 'PSX': 2, 'WMB': 2, 'VLO': 1, 'OXY': 1, 'HAL': 1, 'PXD': 1})
['XOM', 'CVX', 'SLB', 'EOG', 'COP', 'PSX', 'HAL', 'OXY']
[10, 7, 9, 8]
Counter({'XOM': 4, 'CVX': 4, 'COP': 4, 'EOG': 4, 'SLB': 4, 'MPC': 3, 'PSX': 3, 'OXY': 2, 'HAL': 2, 'WMB': 2, 'VLO': 1, 'PXD': 1})
['XOM', 'COP', 'EOG', 'MPC', 'WMB', 'SLB']
[10, 7, 9, 8, 6]
Counter({'XOM': 5, 'COP': 5, 'EOG': 5, 'SLB': 5, 'CVX': 4, 'MPC': 4, 'PSX': 3, 'WMB': 3, 'OXY': 2, 'HAL': 2, 'VLO': 1, 'PXD': 1})
['XOM', 'EOG', 'MPC', 'SLB', 'PSX', 'COP