## Stock Picker


In [1]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path

In [9]:
# Reads input regarding file name
def safe_risky_wise ():
    standard = input('Please pick one from the three: "safe", "risky" or "wise" stocks')
    if (standard != 'safe') & (standard != 'risky') & (standard != 'wise'):
        print ("Input invalid please try again")
        return safe_risky_wise ()
    else:
        return standard
    
def short_long ():
    term = input('Please pick one from the two: "short" or "long" term')
    if (term!='short') & (term!='long'):
        print ("Input invalid please try again")
        return short_long ()
    else:
        return term

def pick_num ():
    wish_to_pick = input('Please enter the number of stocks desired to pick')
    if (not (wish_to_pick.isnumeric())) | int(wish_to_pick) <= 0:
        print ("Input invalid please try again")
        return pick_num()
    else:
        return wish_to_pick

def money ():
    money = input('Please enter the amount of money to invest')
    if (not money.isnumeric()) | int(money) <= 0:
        print ("Input invalid please try again")
        return pick_num()
    else:
        return money
    

def pick_file ():
    user_input = input("Please upload your csv ticker file name")
    file_name = Path(user_input)
    if file_name.exists():
        return user_input
    else:
        print("File not found please try again")
        return pick_file()


volatility = safe_risky_wise()
print(volatility)
short_long = short_long()
money = money()
wish_to_pick = pick_num()
file = pick_file()

# read CSV file
ticker_df = pd.read_csv('Tickers.csv', header = None)
ticker_df.columns = ['Tickers']
unfiltered_ticker_lst = list(ticker_df['Tickers'])

# create lists 
ticker_list = []
ticker_dict ={}
ticker_history = {}
ticker_info = {}

# timeframe
start_date = '2020-01-01'
purchase_date = datetime.date(datetime.now()) # day of purchase.

# extract ticker info and ticker history
for name in unfiltered_ticker_lst:
    ticker_info[name] = yf.Ticker(name).info
    if ticker_info[name].get('regularMarketPrice') == None:
        continue
    else:
        ticker_history[name] = yf.Ticker(name).history (start = start_date, end = purchase_date, interval = '1d')
    
# filter ticker list so that it only contains real tickers in USD
# look for actively traded stocks.
for i in unfiltered_ticker_lst:
    info = ticker_info[i]
    if info.get('regularMarketPrice') == None:
        continue
    if info.get('currency')!= 'USD':
        continue
    if info.get ('market') != ('us_market'):
        continue
    history = ticker_history[name]
    if history.Volume.mean() < 10000:
        continue
    else:
        ticker_list.append(i)
        ticker_dict[i] = info
        
# remove the duplicated tickers
ticker_list = list(dict.fromkeys(ticker_list))

Please pick one from the three: "safe", "risky" or "wise" stocks short


Input invalid please try again


Please pick one from the three: "safe", "risky" or "wise" stocks safe


safe


Please pick one from the two: "short" or "long" term short
Please enter the amount of money to invest 788
Please enter the number of stocks desired to pick 78
Please upload your csv ticker file name ti


File not found please try again


Please upload your csv ticker file name ticker


File not found please try again


Please upload your csv ticker file name Tickers.csv


In [10]:
# calculate market capitalization
# getting number of shares outstanding for each valid ticker
share_list =[]
for name in ticker_list:
    share_list.append(ticker_info[name].get('sharesOutstanding'))

# create dataframe for market capitalisation
df_market_cap = pd.DataFrame({'company' :ticker_list})
df_market_cap['outstanding_shares'] = share_list
df_market_cap['closing'] = 0

# function that returns the most recent available closing price (closest to end-date)
def find_closing (df_close, i):
    closing = df_close.Close.iloc[i]
    if pd.isna(closing) == True:
        return find_closing (df_close, i-1)
    else:
        return closing

# read in all the tickers and extract the outstanding shares and closing prices
i=0
pd.options.mode.chained_assignment = None
for ticker in ticker_list:
    df_market_cap.closing.iloc[i] = find_closing(ticker_history[ticker], -3);
    i+=1;

# calculate the market capitalisation
df_market_cap['Market_Cap'] = df_market_cap.closing* df_market_cap.outstanding_shares

# rank the stocks based on their market cap
# higher market cap would have higher rank (higher number)
df_market_cap['Rank'] = df_market_cap.Market_Cap.rank(method='max')

# NEW CODE 
df_market_cap['Rank'] = df_market_cap['Rank'].fillna(df_market_cap['Rank'].mean())

market_cap_dict = dict(zip (ticker_list, df_market_cap['Rank'].to_list()))

In [11]:
df_market_cap

Unnamed: 0,company,outstanding_shares,closing,Market_Cap,Rank
0,AAPL,16334399488,175.529999,2867177000000.0,28.0
1,ABBV,1767879936,135.87999,240219500000.0,21.0
2,LOW,673747008,250.720001,168921900000.0,15.0
3,HOOD,835675008,16.25,13579720000.0,3.0
4,AMZN,507148000,3304.139893,1675688000000.0,27.0
5,AXP,774556032,174.470001,135136800000.0,13.0
6,BAC,8184079872,48.860001,399874100000.0,24.0
7,BMBL,119799000,32.689999,3916229000.0,2.0
8,BK,825820992,63.549999,52480920000.0,6.0
9,SQ,399175008,145.470001,58067990000.0,8.0


We know that market capitalisation shows the total market value of a company which is the total number of shares times its stock price. It has also been acknowledged that companies with large market capitalisation tend to have a larger size. They are more favourable among investors. Unlike small companies, companies with large market capitalisation tend to be more stable in terms of their stock prices since they have expanded so big that small market fluctuations would not result in huge stock price fluctuations. Their stock prices would likely grow steadily rather than experiencing dramatic ups and downs. Since our strategy is safe, we need to pick stocks that are stable, and companies with large market capitalisation would satisfy the strategy.

We can see from the dataframe above that, companies have large market capitalisation would have a higher rank which means it would have a higher number of rank, which when we later calculate the score for market capitalisation, they would receive a higher score.

In [12]:
##Get the beta of the stock
ticker_beta = {}
for k in ticker_list:
    if ticker_dict[k]['beta'] == None:
        ticker_beta[k] = 1
    else:
        ticker_beta[k] = ticker_dict[k]['beta']

In [13]:
# show beta dict
ticker_beta

{'AAPL': 1.202736,
 'ABBV': 0.824125,
 'LOW': 1.292448,
 'HOOD': 1,
 'AMZN': 1.095793,
 'AXP': 1.243284,
 'BAC': 1.49113,
 'BMBL': 1,
 'BK': 1.156614,
 'SQ': 2.289406,
 'VZ': 0.417638,
 'CMCSA': 0.944247,
 'SHOP': 1.399391,
 'COST': 0.635073,
 'CSCO': 0.941713,
 'CVS': 0.841358,
 'GM': 1.175874,
 'GOOG': 1.058286,
 'JPM': 1.115053,
 'IBM': 1.129244,
 'ORCL': 0.788646,
 'OXY': 2.360364,
 'DUOL': 1,
 'PEP': 0.665484,
 'SLB': 2.303091,
 'SO': 0.505828,
 'SPG': 1.520527,
 'PYPL': 1.154586}

In [14]:
# rank the companies based on beta
ticker_beta_rank = {key: rank for rank, key in enumerate(sorted(ticker_beta, key=ticker_beta.get, reverse=True), 1)}
ticker_beta_rank

{'OXY': 1,
 'SLB': 2,
 'SQ': 3,
 'SPG': 4,
 'BAC': 5,
 'SHOP': 6,
 'LOW': 7,
 'AXP': 8,
 'AAPL': 9,
 'GM': 10,
 'BK': 11,
 'PYPL': 12,
 'IBM': 13,
 'JPM': 14,
 'AMZN': 15,
 'GOOG': 16,
 'HOOD': 17,
 'BMBL': 18,
 'DUOL': 19,
 'CMCSA': 20,
 'CSCO': 21,
 'CVS': 22,
 'ABBV': 23,
 'ORCL': 24,
 'PEP': 25,
 'COST': 26,
 'SO': 27,
 'VZ': 28}

The above dictionaries show beta for each company. 
Since beta is a measure of a stock's volatility in relation to the overall market.
A stock that swings more than the market over time has a beta above 1.0. If a stock moves less than the market, the stock's beta is less than 1.0. 
Since our strategy is safe, we need stocks that are less risky and less volatile.
High-beta stocks are supposed to be riskier but provide higher return potential; low-beta stocks pose less risk but also lower returns.
In this case, we give the ones with lower beta a higher score(higher number) so that when calculating the score fot beta, the companies with lower beta would receive higher score since they are less volatile and safer 

In [15]:
# Here we compare a Charles Schwab's SP500 Index Fund beta (representing a portion of the aggregate stock market)
# with each ticker in our 
index_beta = yf.Ticker('SWPPX').info['beta3Year']
difference_between_market_and_tickers = []
for i in list(ticker_beta.values()):
    difference_between_market_and_tickers.append(i - index_beta)
is_beta_less_than_1 = []
for i in difference_between_market_and_tickers:
    if abs(i) < 1:
        is_beta_less_than_1.append(True)
    else: 
        False
is_beta_less_than_1 

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True]

Since a large portion/all of these differences are in fact less than 1, we can conclude
that a safe portfolio has been constructed that is somewhat similar to market volatility especially when we rank and filter our picked tickers.

In [16]:
print(str((is_beta_less_than_1.count(True))/len(is_beta_less_than_1) * 100) + '% of ticker betas are within 1 away from the market beta demonstrating that ticker volality is comparable to that of the market and thus, a safe portfolio will be formed')

100.0% of ticker betas are within 1 away from the market beta demonstrating that ticker volality is comparable to that of the market and thus, a safe portfolio will be formed


In [17]:
##Get the return and standard deviation of each stock to calculate risk adjusted daily returns 
tickers_close = []
tickers_close_dict = {}
for i in ticker_list:
    tickers_close.append(ticker_history[i]['Close'])
    close = ticker_history[i]['Close']
    tickers_close_dict[i] = close
ticker_keys = tickers_close_dict.keys()

listof_rate_of_return = []
listof_std = []
for i in range(len(tickers_close)):
    close_df = pd.DataFrame(tickers_close[i])
    pct_change = 100*close_df.pct_change()
    pct_change.columns = ['Rate of Return (%)']
    avg_pct_change = pct_change['Rate of Return (%)'].describe()['mean']
    listof_rate_of_return.append(avg_pct_change)
    std = pct_change.std()
    listof_std.append(std)
listof_std_float = []
for i in range(len(listof_std)):
    listof_std_float.append(float(listof_std[i]))
rate_of_return_dict_1d = dict(zip(ticker_keys, listof_rate_of_return))
std_dict_1d = dict(zip(ticker_keys, listof_std_float))

 # calculate the return over standard deviation for one day, return a dict
return_over_std_1d_dict={}
for ticker in ticker_list:
    ratio = (rate_of_return_dict_1d[ticker]/std_dict_1d[ticker])
    return_over_std_1d_dict[ticker] = ratio
return_over_std_rank_1d = {key: rank for rank, key in enumerate(sorted(return_over_std_1d_dict, key=return_over_std_1d_dict.get, reverse=False), 1)}

The above dictionary shows the standard deviation of each company. Standard deviation mainly represents the riskiness of each ticker since it calculates how spread out the data is. In this case, since we are aiming to choose the safest stocks. Therefore, the stocks that have less outlier stock prices would have a more steady stock price. Lower standard deviation indicates the stocks is less risky and has less fluctuations in stock prices.

In [18]:
return_over_std_rank_1d

{'HOOD': 1,
 'BMBL': 2,
 'DUOL': 3,
 'VZ': 4,
 'SLB': 5,
 'IBM': 6,
 'OXY': 7,
 'JPM': 8,
 'SO': 9,
 'CMCSA': 10,
 'SPG': 11,
 'BK': 12,
 'AXP': 13,
 'BAC': 14,
 'CSCO': 15,
 'PEP': 16,
 'GM': 17,
 'PYPL': 18,
 'CVS': 19,
 'SQ': 20,
 'ORCL': 21,
 'AMZN': 22,
 'ABBV': 23,
 'LOW': 24,
 'SHOP': 25,
 'GOOG': 26,
 'COST': 27,
 'AAPL': 28}

The two dictionaries above indicate the average rate of return over standard deviation (sharp ratio). Sharp ratio indicates the average return per unit of risk. Investors should invest in the stocks that have higher sharp ratio since it is the safest. The companies that have higher sharp ratio would return the same amount of money with less risk involved which indicates that they are safer. Since our strategy is safe, it is better for us to choose stocks that have higher sharp ratio. We also ranked the companies based on their ratios. The companies with larger ratio would receive larger score which would lead to a larger score when calculating the score for each company.

In [19]:
##Get the return and standard deviation of each stock to calculate risk adjusted monthly returns 
tickers_close_1mo = []
tickers_close_dict_1mo = {}
for i in ticker_list:
    tickers_close_1mo.append(yf.Ticker(i).history(start="2019-01-01", end="2021-11-01", interval='1mo')['Close'])
    close_1mo = yf.Ticker(i).history(start="2019-01-01", end="2021-11-01", interval='1mo')['Close']
    tickers_close_dict_1mo[i] = close_1mo
ticker_keys = tickers_close_dict_1mo.keys()
listof_rate_of_return_1mo = []
listof_std_1mo = []
for i in range(len(tickers_close_1mo)):
    close_df_1mo = pd.DataFrame(tickers_close_1mo[i])
    pct_change_1mo = 100*close_df_1mo.pct_change()
    pct_change_1mo.columns = ['Rate of Return (%)']
    avg_pct_change_1mo = pct_change_1mo['Rate of Return (%)'].describe()['mean']
    listof_rate_of_return_1mo.append(avg_pct_change_1mo)
    std_1mo = pct_change_1mo.std()
    listof_std_1mo.append(std_1mo)
listof_std_1mo_float = []
for i in range(len(listof_std_1mo)):
    listof_std_1mo_float.append(float(listof_std_1mo[i]))
rate_of_return_dict_1mo = dict(zip(ticker_keys, listof_rate_of_return_1mo))
std_dict_1mo = dict(zip(ticker_keys, listof_std_1mo_float))

# calculate return over standard deviation for one month, return a dict
return_over_std_1mo_dict={}
for ticker in ticker_list:
    ratio = (rate_of_return_dict_1mo[ticker]/std_dict_1mo[ticker])
    return_over_std_1mo_dict[ticker] = ratio

# rank the tickers by their expected one month rate of return/ std and return a dict
# Note that tickers with higher return/std would receive higher rank
return_over_std_rank_1mo = {key: rank for rank, key in enumerate(sorted(return_over_std_1mo_dict, key=return_over_std_1mo_dict.get, reverse=False), 1)}

In [20]:
return_over_std_1mo_dict

{'AAPL': 0.4826713762377341,
 'ABBV': 0.22634625820451346,
 'LOW': 0.3414500848283831,
 'HOOD': -1.5285197020144545,
 'AMZN': 0.31454086217240007,
 'AXP': 0.18332153901498718,
 'BAC': 0.19799315193584285,
 'BMBL': -0.33246776565956004,
 'BK': 0.0833329847444518,
 'SQ': 0.28106125317552794,
 'VZ': 0.044691724471135515,
 'CMCSA': 0.18930473242079648,
 'SHOP': 0.56165238647176,
 'COST': 0.5629467018449709,
 'CSCO': 0.12078456568637284,
 'CVS': 0.19300882440367756,
 'GM': 0.17016936411172306,
 'GOOG': 0.42325591200482815,
 'JPM': 0.22190647298980212,
 'IBM': 0.048228003438864374,
 'ORCL': 0.33507483150768475,
 'OXY': 0.03362858071770054,
 'DUOL': -0.03804747290147993,
 'PEP': 0.27039115694758564,
 'SLB': 0.0230692909760266,
 'SO': 0.2002884252888251,
 'SPG': 0.07656492824548679,
 'PYPL': 0.2607711948443393}

In [21]:
return_over_std_rank_1mo

{'HOOD': 1,
 'BMBL': 2,
 'DUOL': 3,
 'SLB': 4,
 'OXY': 5,
 'VZ': 6,
 'IBM': 7,
 'SPG': 8,
 'BK': 9,
 'CSCO': 10,
 'GM': 11,
 'AXP': 12,
 'CMCSA': 13,
 'CVS': 14,
 'BAC': 15,
 'SO': 16,
 'JPM': 17,
 'ABBV': 18,
 'PYPL': 19,
 'PEP': 20,
 'SQ': 21,
 'AMZN': 22,
 'ORCL': 23,
 'LOW': 24,
 'GOOG': 25,
 'AAPL': 26,
 'SHOP': 27,
 'COST': 28}

We also have calculated the average return over standard deviation (sharp ratio) for each ticker for monthly period. Since daily stock prices could have lots of fluctuations, monthly period data gives a more general and overall trend. Hence it is better to combine these two when analyzing the overall riskiness of the companies.

The above dictionary represents the the scoring for sharp ratio for monthly data. It uses the same method as the daily data except it has a different time interval. We can see that the scoring of each company for daily and monthly data are not the same but are similar. Investors should invest in stocks that have large sharp ratio since larger sharp ratio indicates that same amount of money can be achieved with smaller risk involved. since our strategy is safe and less risky we need companies that have higher sharp ratio so that when when stocks are bought, they tend to have less fluctuations.

Again, the companies with large sharp ratio would receive a higher score which would also lead to a higher score when we are picking the stocks.

In [None]:
##The formula for calculating the score of the stock. This score will then be ranked to ensure the highest scoring stocks are chosen.
## 20% mkt cap  + 30% rate of return/std (1d) + 20% beta + 30% rate of return/std (1m) 

We gave each factor that we come up with a percentage. The percentage represents how much this factor would weigh when calculating the final score for each company. We gave 30% to return/std (1d) and 30% to return/std (1m) because we think that we can directly see the riskiness through these data, and they are more representative of the overall riskiness of each company so we give each of them a larger proportion. For beta and capitalisation, we each gave them 20% because we believed that they are less representative compared with the two ratios.

With these four factors being calculated and being assigned proportion, we believe that they will generate a reliable score for each company in terms of their riskiness. The safer the company is, the higher the score is.

In [22]:
#creating a dictionary with each stock ranking 
stock_score_list = {}
stock_score_short = {}
stock_score_long = {}
stock_wise_long = {}

for ticker in ticker_list:
    # wise
    score = 0.2 * (ticker_beta_rank[ticker]) + 0.6 * (return_over_std_rank_1d[ticker])+0.2*market_cap_dict[ticker]
    wise_long = 0.2 * (ticker_beta_rank[ticker]) + 0.6 *(return_over_std_rank_1mo[ticker])+0.2*market_cap_dict[ticker]
    #safe
    score_short = 0.2 * (ticker_beta_rank[ticker]) + 0.35 * (std_dict_1d[ticker]) + 0.35 * (std_dict_1d[ticker]) + 0.1 * (rate_of_return_dict_1d[ticker])
    score_long = 0.2 * (ticker_beta_rank[ticker]) + 0.35 * (std_dict_1mo[ticker]) + 0.35 * (std_dict_1mo[ticker]) + 0.1 * (rate_of_return_dict_1d[ticker])
    
    stock_score_list[ticker] = {ticker:score}
    stock_wise_long[ticker] = wise_long
    stock_score_short[ticker] = score_short
    stock_score_long[ticker] = score_long

stock_wise_long = dict(sorted(stock_wise_long.items(), key=lambda item: item[1]))
stock_score_long = dict(sorted(stock_score_long.items(), key=lambda item: item[1]))
stock_score_short = dict(sorted(stock_score_short.items(), key=lambda item: item[1]))


In [None]:
#sorting the stocks into their respective sector, so highest from each sector is chosen to minimize concentratio.
ticker_sectors = {}
for ticker in ticker_list:
    # NEW CODE
    if 'sector' not in ticker_dict[ticker].keys():
        if 'other' not in ticker_sectors:
            ticker_sectors['other'] = stock_score_list[ticker]
        else:
             ticker_sectors['other'].update(stock_score_list[ticker])
    
    else:
        sector = ticker_dict[ticker]['sector']
        if sector not in ticker_sectors:
            ticker_sectors[sector] = stock_score_list[ticker]
        else:
            ticker_sectors[sector].update(stock_score_list[ticker])

for sector in ticker_sectors:
    ticker_sectors[sector] = sorted(ticker_sectors[sector].items(), key= lambda x: x[1], reverse=True)


In [None]:
ticker_sectors

The above dictionary shows the sorted company lists. Each sector would contain the companies from that sector (if a stock does not have sector we would put it into others). We believe that this is helpful since we want to pick stocks from different sectors/industry to diversify our portfolio. When something happens to one industry, it would not ruin the whole portfolio because of diversification. We would eliminate non-market risk by diversifing the stocks so that our target (safe) can be achieved. The stocks in each sector is sorted based on their final score with the stocks with higher score(safer) being placed at the top.

In [None]:
# decide the number of tickers to be picked, we always pick the maximum
# since we are doing safe and we need diversification
company_num = len(ticker_list)
if wish_to_pick > company_num:
    print("Exceeds the limit (number entered more than the provided file)")
else:
    company_num = wish_to_pick

# pick the tickers
picked_ticker_list=[]


# function that picks the tickers
# Since the tickers that are safer are placed at the top for each industry, 
# always append the top one for each industry and repeat the step
def pick_stock (num, ticker_sector, picked_ticker_list):
    for sector in ticker_sector:
        if (company_num == num):
            return picked_ticker_list
        elif (len(ticker_sector[sector]) == 0):
            continue
        else:
            picked_ticker_list.append (list(ticker_sector[sector])[0])
            ticker_sector[sector].pop(0)
            num += 1
            continue
    if company_num > num:
        return pick_stock (num, ticker_sector, picked_ticker_list)
    if company_num == num:
        return picked_ticker_list
    return picked_ticker_list
  
# perform the function to filtered ticker list
picked_ticker_list = dict(pick_stock(0, ticker_sectors, picked_ticker_list))
picked_ticker_list

In [None]:
wish_to_pick

From the above code block, it can be noticed that we choose the maximum number of stocks. If valid stocks are bigger than 20 we would pick the maximum limit of 20. If valid stocks are less than 20, we would pick all of them. The reason behind this is also diversification. Whether it is diversification within the same industry or in different industries, as long as the number of our companies are large, we could say that it is considered relatively less risky. Diversification could lower the non-market risk ensuring that if one thing happens to one particular company, the total portfolio would not be ruined. Hence we would pick the maximum stocks since we want to be safe.

The method that we pick stocks is that we pick top (safest) stocks from each sector. We have already ranked the stocks in each sector based on their final score, the ones with larger score would be placed on top, and the ones with lower score would be placed at the bottom. We would start picking the first stock from each sector to minimize the risk, and repeat the step until the maximum stock number is achieved. In this way, we can make sure we pick only the safest stocks from each sector ignoring the more risky stocks that are placed at the bottom.

In [None]:
# graph the closing prices for each company picked
for ticker in picked_ticker_list:
    plt.plot(ticker_history[ticker].index, ticker_history[ticker].Close, label=ticker)

# labels
plt.title('Picked Ticker Closing Prices from Jul 2021 to Nov 2021')
plt.xlabel('Dates')
plt.xticks(rotation=90)
plt.ylabel('Closing Price (USD)')


# Create legend
plt.legend(loc='best')

plt.show()

From this graph, we can visualize the closing prices for the stocks we picked. We can see that they have been relatively steady and stable without any major ups and downs which indicates that they are relatively safe and less risky.

In [None]:
if safe_risky_wise == 'safe':
    if short_long == 'short':
        picked_ticker_list = stock_score_short.keys[:wish_to_pick]
    if short_long == 'long':
        picked_ticker_list = stock_score_long.keys[:wish_to_pick]
    else: 
        print('Input invalid please try again')
if safe_risky_wise == 'risky':
    if short_long == 'short':
        picked_ticker_list = stock_score_short.keys[-wish_to_pick:]
    if short_long =='long':
        picked_ticker_list = stock_score_long.keys[-wish_to_pick:]
    else:
        print('Input invalid please try again')
if safe_risky_wise == 'wise':
    if short_long == 'long':
        picked_ticker_list = stock_wise_long.keys[:wish_to_pick]
    if short_long == 'short':
        continue
    else:
        print('Input invalid please try again')
else print('Input invalid please try again')

In [None]:
total_score = 0

for stock in picked_ticker_list.keys():
    total_score += stock_score_list[stock][stock]

##weight proportional
for stks in picked_ticker_list.keys():
    stck_weights[stks] = (stock_score_list[stks][stks]/total_score)

# extract the closing price on Nov 26 for each ticker picked
ticker_closing_list = []
for ticker in picked_ticker_list.keys():
    ticker_closing_list.append(float(yf.Ticker(ticker).history(start= purchase_date, interval='1d')['Close'].to_list()[0]))

##column names and values for weights and tickers
weight_list = list(stck_weights.values())
pcked_ticker_list = list(picked_ticker_list.keys())

After we pick our stocks, it is important to make sure it meets the weighting criteria. We first assign the minimum weight to each of them and would assign additional weights to them proportionally based one their final score (final score/ total score). In this way, we ensure that not only would they meet the weighting limit, but the stocks with larger final score (safer) would receive higher weight. We think it is reasonable.

In [None]:
# create final portfolio dataframe
if type(money)!='int':
    print ('Input invalid please try again')

FinalPortfolio = pd.DataFrame({'Ticker': pcked_ticker_list,  'Weight' : weight_list})
FinalPortfolio['Shares'] = (FinalPortfolio.Weight * 100000)/ FinalPortfolio.Price
FinalPortfolio['Value'] = FinalPortfolio.Weight * 100000
FinalPortfolio = FinalPortfolio.drop(columns='Weight')
FinalPortfolio['Weight'] = weight_list 
FinalPortfolio['Weight'] = FinalPortfolio['Weight'] * 100 

# make index start from 1
FinalPortfolio.index += 1

# values add up to 100000, and weights add up to 100
print ("The total value adds up to", round(FinalPortfolio['Value'].sum(), 3), "and the total weight adds up to", FinalPortfolio['Weight'].sum())

In [None]:
FinalPortfolio

In [None]:
# Extracting Ticker and Shares and form new dataframe
Stocks = FinalPortfolio [['Ticker', 'Shares']]
Stocks

In [None]:
# make a csv file out of Stocks dataframe
Stocks.to_csv('Stocks_Group_19.csv')

<span style="color: red;">Our file mainly experiences two yahoo finance related problems. The first one is that some tickers may not have the data for outstanding shares needed for calculating market capitalisation as well as the final score, and the second one is that some tickers might not have sectors in their ticker info. We fixed the first problem by assigning the average score(rank) to stocks that do not have data available for outstanding shares. We think it is reasonable since they would not be penalized too much on their final score because of their missing data. The score is calculated by taking the mean(average score) of the total scores for market capitalization that the tickers in ticker_list have, where ticker_list is the filtered tickered list, and every ticker in ticker_list would receive a score (from 1 to (len (ticker_list))) based on their market capitalisation. We think it is reasonable to give these stocks a fair score which is the mean score. The second problem is that some tickers may not have sectors. This problem arises when we sort the tickers based on their sectors. We solved this problem by giving them a sector of 'others'. Tickers that are in 'others' are the tickers that do not have a sector, and they would be treated fairly just like any other tickers when we diversify and pick the stocks. </span>
