In [51]:
import pandas as pd
import numpy as np
from itertools import combinations
import os
import matplotlib.pyplot as plt

In [4]:
company_col = 'Index Constituents'
sharpe_col = 'Sharpe Ratio'
indicator_cols = [
    'Price/Forward EPS',
    'PEG Ratio',
    'Trailing TEV/Total Revenue',
    'Trailing Price/EPS',
    'Short Interest/Shares Outstanding',
    'Three Year R-Squared',
    'Estimated Annual EPS Growth - 1 YR %',
    'Transcript Score ' 
]

indicator_direction = {
    'Price/Forward EPS': False,
    'PEG Ratio': False,
    'Trailing TEV/Total Revenue': False,
    'Trailing Price/EPS': False,
    'Short Interest/Shares Outstanding': False,
    'Three Year R-Squared': True,     # false if seeking alpha, but keep in mind mroe risk
    'Estimated Annual EPS Growth - 1 YR %': True,
    'Transcript Score ': True
}

What we have so far:
We have the ranks for each of the indicators.
We also have the scores for each of the indicators.
We also have the equal weighted (average) combinations for each of the indicators.

# Goal now
Take Rank Combos and Score Combos based on performance strengths, and based on congintive diversity
For diversity strength weighting:
1. Find the RSC functions of each indicator. So we will have 8 RSC functions.
2. Find the cognitive diversity. 
3. Plug in to find the diversity strength of an indicator into the rank/score formulas.
4. Find the combinations
5. When taking combinations of k>1, take a look at the combination formulas, take the sum of ... (the weight of one model * the score/rank of the same model) and the weight of the other models * the score of the other models. I.e. if k=2, then take the weight*the score/rank of one model + the weight * the score/rank of the other model. That is your combos.

For weighted combinations:
1. Less steps because we mainly look at just the sharpe ratio, and that is our weight for each indicator. To calculate this sharpe ratio/performance, we need to take the average of the top 10% of indicators based on score/rank individually, then we combine. So the top 10% of scores of an indicator will be the weight for that indicator.
2. We plug in this weight into our score/rank formulas.


In [5]:
def grabdata(name):
    rankpth = os.path.join("Rank_Combos","1",name)
    scorepth=os.path.join("Score_Combos","1",name)
    dfr = pd.read_csv(rankpth)
    dfs=pd.read_csv(scorepth)
    rank = dfr["Rank"].to_list()
    score = dfs['Score'].to_list()
    return score,rank

#test
score,rank = grabdata('Estimated_Annual_EPS_Growth_-_1_YR_%.csv') #automate this for every indicator
print(score)

[1.0, 0.958142806894126, 0.3385508265916285, 0.2826239887442842, 0.281568765388674, 0.2590573338023215, 0.2562434048540274, 0.2541329581428069, 0.2449876890608512, 0.236545902215969, 0.2230038691523039, 0.2224762574744987, 0.2017235314808301, 0.1931058740766795, 0.1855434400281392, 0.1843123461132606, 0.1832571227576503, 0.1823777699613084, 0.1804431938093563, 0.1785086176574041, 0.1774533943017938, 0.1767499120647203, 0.1734083714386211, 0.1732325008793528, 0.167780513542033, 0.167780513542033, 0.1669011607456911, 0.166373549067886, 0.1651424551530073, 0.1647907140344706, 0.1644389729159338, 0.1644389729159338, 0.1633837495603236, 0.1619767850861765, 0.1618009145269082, 0.1616250439676398, 0.1614491734083714, 0.161273302849103, 0.1602180794934927, 0.159866338374956, 0.1589869855786141, 0.157052409426662, 0.1568765388673935, 0.1563489271895884, 0.1559971860710517, 0.1556454449525149, 0.1554695743932465, 0.1552937038339782, 0.1549419627154414, 0.1545902215969047, 0.1540626099190995, 0.1

In [6]:
#Let's store all of the scores in an list of lists
pth = os.path.join("Score_Combos","1")
files = os.listdir(pth)
scores=[]
ranks=[]
for i in range(8):
    score,rank=grabdata(files[i])
    scores.append(score)
    ranks.append(rank)

#test
#print(scores[0])
#had a small problem, understand that python lists needs to be allocated, I cannot just access a point that hasn't been allocated before.

In [8]:
#It would be easier to give each indicator a sort of key, like 1,2,3,...,8
indicators = {}
for i in range(8):
    indicators[files[i]]=i

print(indicators)

{'Three_Year_R-Squared.csv': 0, 'PEG_Ratio.csv': 1, 'Short_Interest_Shares_Outstanding.csv': 2, 'Price_Forward_EPS.csv': 3, 'Estimated_Annual_EPS_Growth_-_1_YR_%.csv': 4, 'Trailing_Price_EPS.csv': 5, 'Transcript_Score_.csv': 6, 'Trailing_TEV_Total_Revenue.csv': 7}


We can say that the RSC is pretty much just our scores. 

However, we will need the ranks...

In [None]:
n=322 #should grab this data in a different way incase our data changes.
t=8

In [10]:
#takes two rsc functions and returns the cognitive diversity of those two functions.
def calcDiv(rsc1,rsc2):
    sum = 0
    for i in range(n):
        sum+=np.square(rsc1[i]+rsc2[i])
    sum/=n
    return np.sqrt(sum)

#test
score1,rank1=grabdata('Three_Year_R-Squared.csv')
cd = calcDiv(score,score1)
print(cd) #this is the cognitive diversity between financial indicators: Estimated annual eps growth and 3 year R sqaured

1.320238406820596


Nigga says Nigga
Weight for diversity str is complete.

In [11]:
#calculates the diversity strength weight of the indicator.
def calcDivStr(indicate):
    sum = 0
    for i in range(8):
        if(i==indicate):
            continue
        sum+=calcDiv(scores[indicate],scores[i])
    sum/=(t-1)
    return sum

#test
print(calcDivStr(indicators['Estimated_Annual_EPS_Growth_-_1_YR_%.csv']))

0.9254198062505682


In [13]:
divstrs = [] #this array divstr will be our weights for ds
for i in range(8):
    divstrs.append(calcDivStr(i))

totalDS=np.array(divstrs).sum()
#I store all the diversity strengths in an array
#I also grab the total diversity used to calculate the weights

In [14]:
print(totalDS)

11.127085221352184


Calculate the performance strength now. (Doing this on 9-16-25)

We defined performance strength to be what? The performance of each indicator (This is taking the top 15% of stocks with the highest scores and deciding how good of a metric this shit is.)


In [25]:
#take the top 15% stocks based on their sharpe ratio. And then take the average of their score, that will be weight for that indicator
def calcPerf(indicate):
    
    indicator_name = files[indicate]
    filepath = os.path.join("Score_Combos", "1", indicator_name)

    df = pd.read_csv(filepath)

    '''
    
    # Ensure the DataFrame has 'Sharpe Ratio' and 'Score' columns
    if sharpe_col not in df.columns or 'Score' not in df.columns:
        print(f"Error: '{sharpe_col}' or 'Score' column not found in {indicator_name}")
        return None
    '''
    # Sort by Sharpe Ratio in descending order and take the top 15%
    top_15_percent = df.sort_values(by=sharpe_col, ascending=False).head(int(len(df) * 0.15))

    # Calculate the average score for the top 15%
    average_score = top_15_percent['Score'].mean()

    return average_score

In [31]:
print(calcPerf(indicators['Transcript_Score_.csv']))

0.5251053370786517


In [33]:
perfstrs = [] # This array perfstrs will be our weights for performance strength
for i in range(8):
    perfstrs.append(calcPerf(i))

#totalPS = np.array(perfstrs).sum()

print(perfstrs)

print(indicators)

[np.float64(0.4538194444444444), np.float64(0.9501429199648198), np.float64(0.9040570175438596), np.float64(0.7986338053740014), np.float64(0.1504609274240825), np.float64(0.8783069750275073), np.float64(0.5251053370786517), np.float64(0.7896999378109454)]
{'Three_Year_R-Squared.csv': 0, 'PEG_Ratio.csv': 1, 'Short_Interest_Shares_Outstanding.csv': 2, 'Price_Forward_EPS.csv': 3, 'Estimated_Annual_EPS_Growth_-_1_YR_%.csv': 4, 'Trailing_Price_EPS.csv': 5, 'Transcript_Score_.csv': 6, 'Trailing_TEV_Total_Revenue.csv': 7}


In [11]:
#function takes in a list of diversity strengths belonging to the indicators chosen.
'''def weightdiv(divstr):
    
    return np.sum(divstr)/totalDS

#test, weight for estimated annual eps growth 1yr
print(weightdiv(divstrs[0]))

#if weight is just divstr, which it should be, then 
print(divstrs)
'''

'def weightdiv(divstr):\n\n    return np.sum(divstr)/totalDS\n\n#test, weight for estimated annual eps growth 1yr\nprint(weightdiv(divstrs[0]))\n\n#if weight is just divstr, which it should be, then \nprint(divstrs)\n'

Keep in mind, this formula will change if when we go k's of 2,3, and onward. We will have to add multiple divstrs to our formula.
-Update: Expanded weightdiv to take in a list.



Weight for performance strength.

# Jabed: GOOD

Let's calculate the performance strength like this:
We take the top 15% of rsc for the sharpe ratio

In [12]:
#so based on the top 15% of stocks based on their score, we take the average of their sharpe ratio to determine that indicator's performance.
# We need to take the top 15%


Score Combination
For 1 stock d, lets say d_i
S_sc(d_i) = (sum all the score systems: weight of scoring system * the score that stock recieved in that scoring system  )/sum of all the weights in the scoring system.

In [15]:
#takes in a list of weights, and list of scores
#the weights list should be composed of all the weights for each indicator
#the score list should be composed of all the scores of that 1 stock for each indicator
def CalcSC(w,s):
    w=np.array(w)
    #s=np.array(s)
    #print(s)
    sc=w.dot(s)
    sc/=w.sum()
    return sc



Rank Combination

In [16]:
#Rank combination function.
#fundamentally similar to SC, wiht minute differences.
def CalcRC(w,r):
    w=np.array(w)
    #print(w)
    w=1/w
    #print(w)
    rc = w.dot(r)
    rc/=w.sum()
    return rc
    

In [17]:
#to test, I need to construct the list of scores/ranks first,
#for each stock, (332 stocks) we need to make a list of size 8 for each of their score values.

#this function takes a path to either rank or score combos.
def dataMatrix(path):
    theFiles = os.listdir(path)
    dfs=[] # a list of dataframes of company+score
    for f in files: #loop through each file and add to overall dfs
        p = os.path.join(path,f)
        d = pd.read_csv(p)
        if 'Score' in d.columns:
            d = d[['Company', 'Score']]
            d.rename(columns={'Score':f},inplace=True) #rename this column so not all columns are named score
            dfs.append(d)
        elif 'Rank' in d.columns:
            d=d[['Company', 'Rank']]
            d.rename(columns={'Rank':f},inplace=True) #rename this column so not all columns are named score
            dfs.append(d)
        else:
            print('Dead')
            continue
        #print(f)
    
    merging = dfs[0] 
    for df in dfs[1:]:
        merging = pd.merge(merging, df, on='Company', how='inner')
    print(type(merging))
    return merging

#print(pth)
#print(dataMatrix(pth))

OK lets get data of our scores and ranks

In [18]:
pth = os.path.join("Score_Combos","1")
scoreMatrix = dataMatrix(pth)
pth = os.path.join("Rank_Combos","1")

rankMatrix = dataMatrix(pth)

#these are our dataframes for the companies, and the scores each company got for each indicator.
#each row of the dataframe is 1 company. There are 8 columns, for the indicators.
#We need to loop through each of the rows to calculate the score/rank combinations.

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [19]:
#to iterate through the rows, I can use the iterrows() function. However a faster impelementation is using itertuples function
#this returns the rows as named tuples
ScoreCombination = [] 
CompaniesScore = []
for i, row in scoreMatrix.iterrows():
    #print(row)
    comp = row['Company']
    r = row.drop('Company').values
    #print(r[7])
    CompaniesScore.append(comp)
    ScoreCombination.append(CalcSC(divstrs,r))
print(CompaniesScore)
print(ScoreCombination)

['NasdaqGS:AAPL', 'NasdaqGS:TROW', 'NasdaqGS:MSFT', 'NYSE:MCO', 'NYSE:MA', 'NYSE:APH', 'NYSE:AMP', 'NYSE:PH', 'NasdaqGS:AMZN', 'NYSE:FTV', 'NYSE:DOV', 'NasdaqGS:KLAC', 'NasdaqGS:LRCX', 'NYSE:IVZ', 'NYSE:IR', 'NasdaqGS:QCOM', 'NasdaqGS:TXN', 'NYSE:SPGI', 'NasdaqGS:TER', 'NYSE:WAB', 'NasdaqGS:CDNS', 'NasdaqGS:TRMB', 'NYSE:V', 'NYSE:ITW', 'NasdaqGS:ROP', 'NasdaqGS:PTC', 'NYSE:EMR', 'NYSE:HLT', 'NasdaqGS:AMD', 'NasdaqGS:MPWR', 'NYSE:PNR', 'NYSE:ETN', 'NYSE:XYL', 'NYSE:CPAY', 'NasdaqGS:FFIV', 'NasdaqGS:MAR', 'NYSE:PPG', 'NasdaqGS:ISRG', 'NYSE:PRU', 'NYSE:AME', 'NYSE:TT', 'NYSE:TXT', 'NYSE:GLW', 'NasdaqGS:ON', 'NYSE:HWM', 'NYSE:TDG', 'NYSE:EMN', 'NYSE:CARR', 'NasdaqGS:CDW', 'NYSE:NOW', 'NYSE:BEN', 'NYSE:DD', 'NYSE:IQV', 'NasdaqGS:ADP', 'NYSE:JCI', 'NYSE:MSCI', 'NYSE:VMC', 'NasdaqGS:HON', 'NasdaqGS:LIN', 'NYSE:IEX', 'NYSE:ROK', 'NasdaqGS:BKNG', 'NYSE:EFX', 'NYSE:MGM', 'NYSE:TDY', 'NasdaqGS:CTSH', 'NYSE:ECL', 'NasdaqGS:META', 'NYSE:MAS', 'NYSE:LII', 'NasdaqGS:NDAQ', 'NYSE:PLD', 'NYSE:MET', 'NY

In [20]:
RankCombination = []
CompaniesRank = []
for i, row in rankMatrix.iterrows():
    #print(row)
    comp = row['Company']
    r = row.drop('Company').values
    CompaniesRank.append(comp)
    RankCombination.append(CalcRC(divstrs,r))
print(CompaniesRank)
print(RankCombination)

['NasdaqGS:AAPL', 'NasdaqGS:TROW', 'NasdaqGS:MSFT', 'NYSE:MCO', 'NYSE:APH', 'NYSE:AMP', 'NYSE:MA', 'NYSE:PH', 'NYSE:FTV', 'NasdaqGS:AMZN', 'NYSE:DOV', 'NasdaqGS:KLAC', 'NasdaqGS:LRCX', 'NYSE:IR', 'NasdaqGS:QCOM', 'NasdaqGS:TXN', 'NYSE:IVZ', 'NasdaqGS:CDNS', 'NYSE:SPGI', 'NasdaqGS:TER', 'NasdaqGS:TRMB', 'NYSE:WAB', 'NYSE:V', 'NYSE:ITW', 'NasdaqGS:ROP', 'NYSE:HLT', 'NasdaqGS:AMD', 'NYSE:EMR', 'NasdaqGS:PTC', 'NYSE:ETN', 'NYSE:PNR', 'NYSE:CPAY', 'NasdaqGS:MPWR', 'NYSE:XYL', 'NasdaqGS:FFIV', 'NYSE:AME', 'NYSE:PRU', 'NYSE:PPG', 'NasdaqGS:ISRG', 'NasdaqGS:MAR', 'NYSE:TT', 'NYSE:GLW', 'NYSE:TXT', 'NasdaqGS:ON', 'NYSE:TDG', 'NYSE:EMN', 'NasdaqGS:CDW', 'NYSE:CARR', 'NYSE:HWM', 'NYSE:JCI', 'NYSE:IQV', 'NasdaqGS:ADP', 'NYSE:NOW', 'NYSE:DD', 'NYSE:BEN', 'NasdaqGS:HON', 'NYSE:VMC', 'NYSE:MSCI', 'NYSE:EFX', 'NYSE:TDY', 'NasdaqGS:CTSH', 'NasdaqGS:BKNG', 'NYSE:ROK', 'NYSE:IEX', 'NYSE:MGM', 'NasdaqGS:LIN', 'NYSE:ECL', 'NYSE:DAL', 'NYSE:LII', 'NasdaqGS:META', 'NYSE:MAS', 'NasdaqGS:NDAQ', 'NYSE:MET', 'NY

In [21]:
#Take top 15%
#also view the best stocks for each
Score = pd.DataFrame({'Company':CompaniesScore,'SC':ScoreCombination})
Rank = pd.DataFrame({'Company':CompaniesRank,'RC':RankCombination})

Score = Score.sort_values(by='SC',ascending= False) #tsla is on the bottom damn
print(Score)
Rank = Rank.sort_values(by='RC',ascending=True)#tsla is on the top for rank, seems more real
print(Rank)
'''
filename = f"Results/8/Rank.csv"
Rank.to_csv(filename,index=False)
print(f"Saved {filename}")
filename = f"Results/8/Score.csv"
Score.to_csv(filename,index=False)
print(f"Saved {filename}")
'''

           Company        SC
6         NYSE:AMP  0.864502
38        NYSE:PRU  0.833520
13        NYSE:IVZ  0.829894
15   NasdaqGS:QCOM  0.823349
79    NasdaqGS:STX  0.822557
..             ...       ...
136  NasdaqGS:CSGP  0.526761
163       NYSE:MAA  0.515167
143       NYSE:DLR  0.503947
115  NasdaqGS:TSLA  0.496026
221  NasdaqGS:AXON  0.472107

[322 rows x 2 columns]
           Company          RC
5         NYSE:AMP   62.123891
36        NYSE:PRU   67.022766
72        NYSE:MET   71.481326
84       NYSE:APTV   74.360894
14   NasdaqGS:QCOM   74.381495
..             ...         ...
131       NYSE:ESS  245.072230
183       NYSE:DOC  250.214401
161       NYSE:MAA  257.421500
218  NasdaqGS:AXON  258.999926
115  NasdaqGS:TSLA  264.017914

[322 rows x 2 columns]


'\nfilename = f"Results/8/Rank.csv"\nRank.to_csv(filename,index=False)\nprint(f"Saved {filename}")\nfilename = f"Results/8/Score.csv"\nScore.to_csv(filename,index=False)\nprint(f"Saved {filename}")\n'

Best Stock via Score Combination:
NYSE:AMP

Best Stock via Rank Combination:
NasdaqGS:TSLA

The top 15% of stocks

In [22]:
def ScoreCombinations(scoreMatrix, K, weight):
    ScoreCombination = []
    CompaniesScore = []

    # Get indicator column names (excluding 'Company')
    indicator_cols = [col for col in scoreMatrix.columns if col != 'Company']

    # All possible column index combinations
    col_combos = list(combinations(range(len(indicator_cols)), K))
    

    for _, row in scoreMatrix.iterrows():
        comp = row['Company']
        r = row.drop('Company').values  # numeric values only

        combos_for_company = []
        for combo in col_combos:
            r_subset = [r[idx] for idx in combo] #subset of which models we chose
            w_subset = [weight[idx] for idx in combo]
            score = CalcSC(w_subset, r_subset)

            # store both indices and values
            combos_for_company.append({
                "indices": combo,
                "columns": tuple(indicator_cols[idx] for idx in combo),
                "values": tuple(r_subset),
                "Score":score
            })

        CompaniesScore.append(comp)
        ScoreCombination.append(combos_for_company)

    return CompaniesScore, ScoreCombination


In [23]:
def RankCombinations(rankMatrix, K, weight):
    RankCombination = []
    CompaniesRank = []

    # Get indicator column names (excluding 'Company')
    indicator_cols = [col for col in rankMatrix.columns if col != 'Company']

    #All possible column index combinations
    col_combos = list(combinations(range(len(indicator_cols)), K))
    

    for _, row in rankMatrix.iterrows():
        comp = row['Company']
        r = row.drop('Company').values  #numeric values only

        combos_for_company = []
        for combo in col_combos:
            r_subset = [r[idx] for idx in combo] #subset of which models we chose
            w_subset = [weight[idx] for idx in combo]
            rank = CalcRC(w_subset, r_subset)

            # store both indices and values
            combos_for_company.append({
                "indices": combo,
                "columns": tuple(indicator_cols[idx] for idx in combo),
                "values": tuple(r_subset),
                "Rank":rank
            })

        CompaniesRank.append(comp)
        RankCombination.append(combos_for_company)

    return CompaniesRank, RankCombination


In [25]:
K2CompScore,K2SC=ScoreCombinations(scoreMatrix,2,divstrs)

Score = pd.DataFrame({'Company':K2CompScore,'SC':K2SC})
print(Score)
filename = f"Results/2/Score.csv"
Score.to_csv(filename,index=False)
print(f"Saved {filename}")

           Company                                                 SC
0    NasdaqGS:AAPL  [{'indices': (0, 1), 'columns': ('Three_Year_R...
1    NasdaqGS:TROW  [{'indices': (0, 1), 'columns': ('Three_Year_R...
2    NasdaqGS:MSFT  [{'indices': (0, 1), 'columns': ('Three_Year_R...
3         NYSE:MCO  [{'indices': (0, 1), 'columns': ('Three_Year_R...
4          NYSE:MA  [{'indices': (0, 1), 'columns': ('Three_Year_R...
..             ...                                                ...
317       NYSE:MCK  [{'indices': (0, 1), 'columns': ('Three_Year_R...
318       NYSE:NOC  [{'indices': (0, 1), 'columns': ('Three_Year_R...
319       NYSE:BMY  [{'indices': (0, 1), 'columns': ('Three_Year_R...
320        NYSE:SW  [{'indices': (0, 1), 'columns': ('Three_Year_R...
321      BATS:CBOE  [{'indices': (0, 1), 'columns': ('Three_Year_R...

[322 rows x 2 columns]


OSError: Cannot save file into a non-existent directory: 'Results/2'

Function to go through all of this.

In [1]:
def MultipleSC(scoreMatrix, weight,w_name,output_dir="Results", k_min=2, k_max=5):
    for K in range(k_min, k_max + 1):
        compScore, SC = ScoreCombinations(scoreMatrix, K,weight)
        Score = pd.DataFrame({'Company': compScore, 'SC': SC})

        # Ensure directory exists
        out_path = os.path.join(output_dir, w_name,str(K))
        os.makedirs(out_path, exist_ok=True)

        filename = os.path.join(out_path, "Score.csv")
        Score.to_csv(filename, index=False)
        print(f"Saved {filename}")


def MultipleRC(rankMatrix, weight,w_name, output_dir="Results", k_min=2, k_max=5):
    for K in range(k_min, k_max + 1):
        compRank, RC = RankCombinations(rankMatrix, K,weight)
        Rank = pd.DataFrame({'Company': compRank, 'RC': RC})

        # Ensure directory exists
        out_path = os.path.join(output_dir, w_name,str(K))
        os.makedirs(out_path, exist_ok=True)

        filename = os.path.join(out_path, "Rank.csv")
        Rank.to_csv(filename, index=False)
        print(f"Saved {filename}")

In [None]:
MultipleSC(scoreMatrix,divstrs,"Divstrs")
MultipleRC(rankMatrix,divstrs,"Divstrs")

MultipleSC(scoreMatrix,perfstrs,"Performance")
MultipleRC(rankMatrix,perfstrs,"Performance")

#MultipleSC(scoreMatrix,)
#MultipleRC(rankMatrix,)

Saved Results/Divstrs/2/Score.csv
Saved Results/Divstrs/3/Score.csv
Saved Results/Divstrs/4/Score.csv
Saved Results/Divstrs/5/Score.csv
Saved Results/Divstrs/2/Rank.csv
Saved Results/Divstrs/3/Rank.csv
Saved Results/Divstrs/4/Rank.csv
Saved Results/Divstrs/5/Rank.csv
Saved Results/Performance/2/Score.csv
Saved Results/Performance/3/Score.csv
Saved Results/Performance/4/Score.csv
Saved Results/Performance/5/Score.csv
Saved Results/Performance/2/Rank.csv
Saved Results/Performance/3/Rank.csv
Saved Results/Performance/4/Rank.csv
Saved Results/Performance/5/Rank.csv


Which combination of indices is the best? This means they should have a high score to sharpe ratio. 

We have data to see exactly how each combo does with each stock, what we should do is take the average sharpe ratio of the top 15% of score/rank and develop charts in matplotlib.

We can do charts for a specific stock down to charts for the whole s&p 500. 

For the whole 322 stocks, we choose top 15% of stocks based on score/rank, then map out the sharpe ratio average, similar to the graph we did earlier.




In [None]:
def process_results(base_dir="Results/Divstrs", sharpe_file="company_sharpe_ratios.csv", k_min=1, k_max=5, summary_dir="Results/Summary"):
    os.makedirs(summary_dir, exist_ok=True)

    # Load Sharpe ratios
    sharpe_df = pd.read_csv(sharpe_file)
    sharpe_df.columns = ["Company", "Sharpe"]
    sharpe_map = dict(zip(sharpe_df["Company"], sharpe_df["Sharpe"]))
    class FakeNP:
        def float64(self, x):
            return float(x)

    safe_env = {"np": FakeNP()}
    #safe_env = {"np": {"float64": float}}  # allow np.float64


      
    for K in range(k_min, k_max+1):
        rank_file = os.path.join(base_dir, str(K), "Rank.csv")
        if not os.path.exists(rank_file):
            print(f"Skipping {rank_file}, not found.")
            continue

        # Load rank file
        df = pd.read_csv(rank_file)
        
        # Expand RC column into rows
        expanded = []
        for _, row in df.iterrows():
            company = row["Company"]
            combos = eval(row["RC"], {"__builtins__": None}, safe_env)  # safe eval
            for combo in combos:
                expanded.append({
                    "Company": company,
                    "Indicators": tuple(sorted(combo["indices"])),
                    "Rank": float(combo["Rank"])
                })
        expanded_df = pd.DataFrame(expanded)

        results = []
        for indicators, group in expanded_df.groupby("Indicators"):
            group_sorted = group.sort_values("Rank", ascending=False)
            top_n = max(1, int(len(group_sorted) * 0.15))  # top 15%
            top_companies = group_sorted.head(top_n)["Company"]

            sharpe_vals = [sharpe_map[c] for c in top_companies if c in sharpe_map]
            if sharpe_vals:
                avg_sharpe = sum(sharpe_vals) / len(sharpe_vals)
                results.append({"Indicators": indicators, "AverageSharpe": avg_sharpe})

        results_df = pd.DataFrame(results).sort_values("AverageSharpe", ascending=False)

        # Save summary
        summary_file = os.path.join(summary_dir, f"K{K}_summary.csv")
        results_df.to_csv(summary_file, index=False)
        print(f"Saved {summary_file}")

        # Plot top 5 combinations
        top5 = results_df.head(5)
        plt.figure(figsize=(10, 6))
        plt.bar(range(len(top5)), top5["AverageSharpe"])
        plt.xticks(range(len(top5)), [str(ind) for ind in top5["Indicators"]], rotation=45, ha="right")
        plt.ylabel("Average Sharpe Ratio")
        plt.title(f"Top 5 Indicator Combinations (K={K})")
        plt.tight_layout()
        plot_file = os.path.join(summary_dir, f"K{K}_top5.png")
        plt.savefig(plot_file)
        plt.close()
        print(f"Saved {plot_file}")

In [53]:
process_results(base_dir="Results/Divstrs",
                sharpe_file="company_sharpe_ratios.csv",
                k_min=1, k_max=5,
                summary_dir="Results/Summary")

Skipping Results/Divstrs/1/Rank.csv, not found.
Saved Results/Summary/K2_summary.csv
Saved Results/Summary/K2_top5.png
Saved Results/Summary/K3_summary.csv
Saved Results/Summary/K3_top5.png
Saved Results/Summary/K4_summary.csv
Saved Results/Summary/K4_top5.png
Saved Results/Summary/K5_summary.csv
Saved Results/Summary/K5_top5.png
