In [1]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
import concurrent.futures as cf

### 1. Strategy for filtering the stocks within the csv file:
 * Given a dataframe of tickers, this function first creates a new empty dataframe including the name of the ticker, the price of it, beta, standard deviation, the market cap and its returns
 * In the filtering stages
     - It first checks if there are any duplicates
     - It then checks to confirm the the stock is traded on US markets
     - Finally it confirms the daily volume from Jul 2 2021 to October 22 2021 is at least 10000
 * In order to get the beta calculations, we get the necessary data needed of the market (S&P500) which was obtained ouside of the function and stored in a dataframe
 * Then, with the help of threading, a for loop that goes through the entirety of the received dataframe and it ...
  - Gets the yfinance data for each stock in the dataframe 
  - Calculates the price, beta, standard deviation and returns to a single row dataframe and adds said dataframe to the main dataframe that was created at the beginning of the function
 * It then returns the final dataframe after escaping the threads

In [2]:
#Takes in a dataframe of tickers and filters out ones that are duplicates, not traded in the US 
#  or have an average volume less than 10000 for Jul 2 to Oct 22 2021. It produces a dataframe
#  with all valid tickers and finance data to go with them.

def filtering(Tickers):
    
    #Creates a new dataframe to store valid tickers and their financial data
    Valid_Tickers =  pd.DataFrame({'Tickers': [],
                                   'Price': [],
                                  'Beta': [],
                                  'STD': [],
                                  'Returns': []})
    
    #makes sure there are no duplicates
    for index in range(len(Tickers.index)):
        if Tickers.iloc[index,0] in Tickers.iloc[index+1:]:
            Tickers.drop([index])
            
    number = 0;
    
    #Threading
    with cf.ThreadPoolExecutor() as executor:
        
        #creates a thread for each Ticker to gets it's history data
        datarow = [executor.submit(filtering_thread, Tickers.iloc[index,0]) for index in range(len(Tickers.index))]
        
        #Adds each ticker's data to the dataframe
        for row in cf.as_completed(datarow):
            Valid_Tickers = Valid_Tickers.append(row.result())
            number+=1
            print(number)
    
    #Formats the data
    Valid_Tickers.reset_index(inplace=True)
    Valid_Tickers = Valid_Tickers[['Tickers', 'Price', 'Beta', 'STD', 'Returns']]
    
    #returns the dataframe with all the data
    return (Valid_Tickers)

In [3]:
#Takes in a Ticker, filters it to ensure it is traded in the US and as enough volume.
#  It then grabs the finacial data and returns a dataframe with a single row 
#  containing the ticker and the financial data

def filtering_thread(Ticker):
    
    #Gets data for filtering
    stock = yf.Ticker(Ticker)
    stock_hist = stock.history(start=data_start, end=data_end, interval='1d')
    
    #gets the data for the last 3 months to check volume
    volume_hist = stock_hist.iloc[-66:-1]
    
    #grabs stock info
    info = stock.info

    #Checks if stock is traded in the US
    if 'market' in info and info['market'] == 'us_market' and not(volume_hist.empty):

        #Checks if the daily volume is at least 10000
        total_sum = stock_hist.Volume.sum(axis=0)
        average = total_sum/(len (stock_hist))
        if average >= 10000:

            #Gets monthly histru for that time
            monthly_hist=stock_hist.resample('MS').first()
            prices = pd.DataFrame(monthly_hist['Close'])
            monthly_returns = prices.pct_change()

            #creates a dataframe for just the daily closing price 
            #  and another one for just daily returns
            daily_price = pd.DataFrame(stock_hist['Close'])
            daily_returns = daily_price.pct_change()

            ####### Price #############

            #Closing price for the last day availible (Nov 26 when run for competition)
            price = stock_hist['Close'].iloc[-1]

            ######## Beta #############

            #Adds markets daily returns to the dataframe
            daily_returns['Market'] = daily_market_returns['Close']

            #Calculates beta
            beta = daily_returns.cov() / daily_market_returns['Close'].var()

            ######### STD #############

            #calculated standard deviation
            std = prices.pct_change().std()

            #returns a dataframe with the tickers price, beta, std and a dataframe for it's returns
            return pd.DataFrame({'Tickers': [Ticker],
                               'Price': [price],
                              'Beta': [beta.iat[1,0]],
                              'STD': [std.Close],
                              'Returns': [stock_hist['Close'].pct_change()]})

### 2. Strategy for choosing the 1 sinlge riskiest stock

* We gathered 3 stocks with the highest standard deviation on monthly returns to be the riskiest stocks. From these 3 stocks, we calculated the riskiest stock to be the one with the highest beta value.
* Stocks with high standard deviation are considered risky as they are stocks with high volatility and great fluctuations with prices. In addition, stocks with high beta values are considered risky as they are more volatile when compared to the overall market. 
* Since standard deviation and beta are both measures of riskiness and we wanted to take both into consideration. We did so by narrowing down the stocks to those that are risky in terms of standard deviation and then the one that is riskiest in terms of beta
* There are some limitations with this approach which we will touch on later that has to do with as unlucky set of data.

In [4]:
#Takes in a set of tickers and determines which ticker is the 
#  rickiest based on standard deviation and beta

def riskiest (Tickers):
    
    # gets the 3 tickers with the highest standard deviation
    largest3_std = Tickers.nlargest(3, ['STD'])
    
    # gets the highest beta value from the 3 tickers with the highest standard deviation
    largest_beta = largest3_std.nlargest(1, ['Beta'])
    
    #returns the riskiest stock
    return (largest_beta)
 

### 3. Discussion for our decision in choosing the stocks to include
* After determining the riskest stock among all the qualified stocks in the given csv file, we now want to decide what other stocks we should include in our portfolio in order to achieve a high-risk level. We will use the characteristics of this riskest stock as our guide to choosing the remaining stocks. The key idea of our approach is to **minimize the effect of diversification**.
* The following steps explain in detail our code and why we do this. 
>1. Firstly, we filter out 20 stocks that are most correlated to the riskest stock determined before. The purpose is to make sure the stocks in our final portfolio are highly correlated, and thus less diversified. As the riskest stock is expected to fluctuate a lot, we expect the rest of the stocks in the portfolio to change in a similar way, so that the total fluctuation is larger. 
>2. Now, we filter out the 9 riskest stocks from the list of 20 correlated stocks obtained in the previous step. To determine their risk level, we mainly look at their standard deviations, as this metric measures fluctuations. We only pick the 9 riskest stocks, because the minimum number of stocks we need to have is 10. To minimize the effect of diversification, we want minimum stocks in our portfolio. 

* Discussions:
    * After designing step 1, we recognized that solely looking at the correlation will not guarantee a high-risk level. To illustrate, consider the case where stock A and stock B have a similar change pattern, say correlation is 0.9, yet, while stock A fluctuates dramatically, the extent to which stock B prices fluctuate can be minimal. Then, adding stock B to the stock A portfolio can decrease the risk level if the flucatuations in stock B can't make up for the risk lost due to diversification. Therefore, we will also consider the risk level of each individual stock, which is step 2. 
    * Finally, we will include a total of 10 stocks in our portfolio  (i.e., 1 riskest stock we determine before + 9 stocks picked in this process), with the property that, every stock is highly correlated to one single stock (the riskest one) and in a high-risk level. 

In [5]:
#Tickers is the dataframe with all the stock and their data (beta, std, mcap, returns)
#Corr is the stock that is being used to get correlation (riskiest). It should be a dataframe
#  similar to Tickers but with only one row.
#  It produces a dataframe of 10 stocks and their data which will be the stock in Corr 
#  and 9 other most correlated and risky stocks.

def other_9(Tickers, Corr):
    
    #creates a dataframe to store values
    Correlation =  pd.DataFrame({'Tickers': [],
                                 'Price': [],
                                 'Beta': [],
                                 'STD': [],
                                 'Returns': [],
                                 'Corr': []})
    
    #gets returns for the riskiest stock
    returns = Corr.iloc[0,4]
    
    #loops through the tickers
    for index in range(len(Tickers.index)):
        
        #makes sure it doesn't get correaltion with itself
        if not(Tickers.iloc[index, 0] == Corr.iloc[0,0]):
            
            #gets the returns for new stock being checked for correlation
            stock_returns = Tickers.iloc[index, 4]
            
            #combines the monthly returns for the risky and other stock in one dataframe
            returns = pd.concat([returns, stock_returns], join='inner', axis=1)
            returns.columns = ['Risky', 'Other']
            
            #adds correlation data to the main dataframe with all stocks and data
            Correlation = Correlation.append(pd.DataFrame({'Tickers': [Tickers.iloc[index,0]],
                                                            'Price': [Tickers.iloc[index,1]],
                                                            'Beta': [Tickers.iloc[index,2]],
                                                            'STD': [Tickers.iloc[index,3]],
                                                            'Returns': [Tickers.iloc[index,4]],
                                                            'Corr': [returns.corr().iat[0,1]]}))
            
            #removes the returns for the other stock
            returns = returns[['Risky']]
            
    #Gets the top 20 most correlated stocks
    most_correlated_20 = Correlation.nlargest(20, 'Corr')
    
    #Gets the top 9 riskiest stocks from the most correlated
    risky_9 = most_correlated_20.nlargest(9, 'STD')
    
    #Combines the dataframe into one final stock with the risky stock
    final = Corr.append(risky_9)
    
    #formatting
    final.reset_index(inplace=True)
    final = final[['Tickers', 'Price', 'Beta', 'STD', 'Returns', 'Corr']]
    
    #returns top 10 stocks
    return(final)
            

### 4. Strategy for deciding the weights
* Firstly, we assign 35% to the riskest stock. 35% is the maximum we can give to one single stock. We choose to do so because we aim to achieve a high risk. By distributing the most weight to the riskest stock, we want our final portfolio to behave more like the riskiest and thus fluctuate more. 
* Secondly, we assign 5% to all of the rest 9 stocks. 5% is the minimum we can give to every single stock. 
* Finally, we distribute the other 20% to only the second third and fourth stocks in our list in the way that the new weight distribution gives us the highest total standard deviation. To do this, we use 2 for loops to interate between between all the distributions, calculate the standard deviation of the entire portfolio and choose the combination that maximises standard deviation. We don't give more weightings to the last 6 stocks because they are less risky and doing so will create more diversification.
* One limitation of this program is the fact that we can't iterate through all the ways to distribute the final $\$20000 between all 9 other stocks. Doing so creates exponetial blow up and trying to do it for all 9 would take 66 years. The most we can iterate though ina resonable time is 3 which takes about 0.6 seconds. Once you start doing 4, it takes a couple minutes and anything more than that will take too long. For this reason, we are only iterating between distribution for 3 stocks. Those 3 stocks are the 3 with the highest standard deviation since they are the most likely to increase riskiness level by being given more weight.

In [6]:
#Takes in a list of 10 tickers and produces weightings for a portfolio to maximise risk level

def weightings(Tickers, availibleCash): 
    
    #Creates list of tickers in the order of risk level 
    Tickers10 = Tickers['Tickers'].iloc[0:10].tolist()

    #Creates the initial weight distribution, which is not set in stone
    weights = [0.35, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05]

    #Creates a list to store Tickers except the second to forth most risky stocks
    RestTickers = Tickers10[4:10]
    RestTickers.insert(0, Tickers10[0])

    #Gets data from yf for those tickers, and stores closing pirces in a dataframe
    data = yf.download(RestTickers,start= "2010-01-01", end= datetime.today(),period='1d')
    restprices = pd.DataFrame(data['Close'])
    restprices = restprices.dropna()
    restprices.columns = RestTickers

    #Creates a list for the weights for each of the stocks in RestTickers (these will not changed) 
    restweights = weights[4:10]
    restweights.insert(0, weights[0])

    #Creates a column in the prices dataframe for the unchanging tickers with for the portfolio value overtime
    restprices['Portfolio_Value'] = 0
    restprices['Portfolio_Value'].iloc[0]=100000*0.65 #sets first row to be the total value of these 7 stocks which is $65000

    #Calculates the value of each of the 7 stocks based on each of their weights
    for ticker in RestTickers:
        restprices[ticker+'_SharesPurchased']= 0
        restprices[ticker+'_SharesPurchased'].iloc[0] = 100000 * restweights[RestTickers.index(ticker)] / restprices[ticker][0]

    #Calculates the total portfolio value over time
    for x in range(1,len(restprices.index)):
        for ticker in RestTickers:
            restprices['Portfolio_Value'].iloc[x] += restprices[ticker+'_SharesPurchased'].iloc[0] * restprices[ticker].iloc[x]

    #Creates a list of the Tickers of the second, third and forth most risky stocks
    Two_Three_Four_Tickers = [Tickers10[1], Tickers10[2], Tickers10[3]]

    #Gets closing prices from yahoo finance for each of these stocks
    data1 = yf.download(Two_Three_Four_Tickers,start= "2010-01-01", end= datetime.today(),period='1d')
    prices = pd.DataFrame(data1['Close'])
    prices = prices.dropna()
    prices.columns = Two_Three_Four_Tickers

    #Sets the starting value of each stock to be $100000
    #We will later find a portion of the values when determining the riskest weightings for these stocks
    for ticker in Two_Three_Four_Tickers:
        prices[ticker+'_SharesPurchased']= 0
        prices[ticker+'_Value']=0
        prices[ticker+'_Value'].iloc[0]=100000
        prices[ticker+'_SharesPurchased'].iloc[0] =  100000 / prices[ticker][0]
        
    #Calculates value over time for each stock
    for x in range(1,len(prices.index)):
        for ticker in Two_Three_Four_Tickers:
            prices[ticker+'_Value'].iloc[x] = prices[ticker+'_SharesPurchased'].iloc[0] * prices[ticker][x]

    #Combines the two dataframes into one that will contain all value and pricing data overtime
    combined = pd.concat([restprices,prices],join='inner', axis=1)

    #Creates a new dataframe to store standard deviations and their weightings
    totalstd = pd.DataFrame(index=range(0, 21))
    totalstd['Standard_Deviation'] = ''
    totalstd['Weight_2'] = ''

    #Creates a dataframe to store the value of each of the stocks that have a dynamic weightings
    value_2= combined[Tickers10[1]+"_Value"]
    value_3= combined[Tickers10[2]+"_Value"]
    value_4= combined[Tickers10[3]+"_Value"]

    #Loops through all possible ways to ditribute $20000 in portions of $1000
    for x in totalstd.index:
        
        #creates a dataframe to store the weightings of the first stock and the standard deviation that goes with it
        weight23 = pd.DataFrame(index=range(0,21-x))
        weight23['Standard_Deviation'] = ''
        weight23['weight1'] = x

        #loops through all ways to distribute the remaining cash not used by the first stock to the second and third stock
        for y in range(0,21-x):
            #Calculates the value and standard deviation of the portfolio for the current weightings
            total_values= combined.Portfolio_Value + (value_2 *(5+x)/100) + (value_3 * (5+y)/100) + (value_4 * (25-x-y)/100)
            portfolio = pd.DataFrame(total_values)
            returns = pd.DataFrame(portfolio.pct_change())
            
            #Adds the standard deviation to a dataframe
            weight23['Standard_Deviation'].iloc[y] = returns.std()[0]

        #Calculates the biggest standard devaition of the dataframe
        #  which changes how cash was distributed betweeen the second and third stock
        std1 = weight23['Standard_Deviation'].max()
        
        #Adds the max standard deviation and it's weights to another dataframe which 
        #  holds standard deviations for different amounts cash in the first stock
        weight2 = weight23.index[weight23['Standard_Deviation']==std1].tolist().pop(0)
        totalstd['Weight_2'].iloc[x] = weight2
        totalstd['Standard_Deviation'] = std1

    ## Determines the weighting for the stock with the biggest standard deviation
    diff = totalstd[totalstd.Standard_Deviation == totalstd['Standard_Deviation'].max()].index[0]
    
    #Updates the weights in the original weights dataframe
    weights[1]=(5 + diff)/100
    weights[2]=(5 + totalstd.iloc[diff].Weight_2)/100
    weights[3]=(25 - diff - totalstd.iloc[diff].Weight_2)/100  
    
    #Creates a final dataframe to output
    FinalPortfolio = Tickers
    
    #adds weights to the dataframe
    weights = pd.Series(weights)
    FinalPortfolio['weights'] = weights
    
    #Calculates number of shares of bought of each stock
    FinalPortfolio['Shares'] = (FinalPortfolio.weights * availibleCash) / FinalPortfolio.Price
    
    #Creates column for the value of each stock within the portfolio
    FinalPortfolio['Value'] = FinalPortfolio.Price * FinalPortfolio.Shares
    
    #Creates a new colummn for the weights at the end of the dataframe with the numbers in terms of %
    FinalPortfolio['Weight'] = FinalPortfolio.weights * 100
    
    #Formatting
    FinalPortfolio = FinalPortfolio[['Tickers', 'Price', 'Shares', 'Value', 'Weight']]
    FinalPortfolio.columns = ['Ticker', 'Price', 'Shares', 'Value', 'Weight']
    
    #returns a final portfolio with the purchasing data for the stock being baught
    return(FinalPortfolio)

### 5. Next Steps:
    
Our project evaluated the risk of stocks based on high standard deviation, high beta value and low diversification. However there are other factors that could have helped to determine the risk level of stocks. These include calculating R-squared (coefficient of determination), Value at Risk (VaR) and/or market capitalization. 

The coefficient of determination displays the percentage of a fund or security's movements based on movements in a benchmark index (for example the Standard & Poor's 500 index). This value helps determine how likely a stock would drop if it's benchmark index dropped. A stock with a high R squared value measured against the riskiest stock from the portfolio can be considered risky. Value at Risk provides a worst-case scenario analysis where it calculates the percent of loss based on a time period and confidence level. It measures the risk of loss for investments. A stock with a high VaR value would be risky as the probability of losing that investment is high. Market cap is the total value of a company’s stocks. It is calculated by multiplying the number of outstanding shares with the current price of each share. A company with a small market cap is deemed more risky than a company with a large market cap. This is because companies with small market caps tend to be young companies with more uncertainties and high volatility. Incorporating these additional risk factors into our project could provide a better portfolio of risky stocks as there would be more factors evaluated with each stock. 

When it comes to limitations to our project, the steps that we take to come up with the riskiest stocks may not produce the best results. For example, when we consider 20 stocks that are highly correlated with the riskiest stock, we would not consider stocks that are risky (according to the beta value and standard deviation) but not correlated with the riskiest stock. One way we can fix the issue is by dynamically setting a minimum value for the correlation coefficient and then gather the stocks that fit the requirement. In the case that there are less than 9 stocks, we would decrease the minimum value of the correlation coefficient until we reach the required number of stocks.

Another limitation comes from the narrowing down of stocks using different values. There can be unlucky set of data where you do not even come close to ending up with a risky set of stocks. Take for example a set where stock A has a STD (standard devaition) of 20 and a beta of 1, stock B has a STD of 8 and a beta of 1.01, and stock C has a STD of 7 and a beta of 1.1. If these were the 3 stocks with the highest STD when trying to find the riskiest, clearly the top riskiest is stock A. But since we choose our riskiest stock off of beta from the top 3 STDs, our algorithm would produce stock C as the riskiest. If we were to fix this issue, we could create a points system that assigns certain amounts of points for each measure of risk. In the above scenario, stock A would get alot of points for it's STD and the other stocks wouldn't be able to get enough from beta to catch up. Perfecting the points values could take awhile but it would eliminate the possibility of the above situation.

In [7]:
# Returns a dataframe with the inputted tickers and their current prices

def grab_tickers(Tickers):
    
    #Creates a new dataframe to store tickers and there price
    Valid_Tickers =  pd.DataFrame({'Ticker': [],
                                   'Price': []})
            
    number = 0;
    
    #Threading
    with cf.ThreadPoolExecutor() as executor:
        
        #creates a thread for each Ticker to gets it's history data
        datarow = [executor.submit(price_thread, Tickers.iloc[index,0]) for index in range(len(Tickers.index))]
        
        #Adds each ticker's data to the dataframe
        for row in cf.as_completed(datarow):
            Valid_Tickers = Valid_Tickers.append(row.result())
            number+=1
            print(number)
    
    #Formats the data
    Valid_Tickers.reset_index(inplace=True)
    Valid_Tickers = Valid_Tickers[['Ticker', 'Price']]
    
    
    
    #returns the dataframe with all the data
    return (Valid_Tickers)

# Takes in a Ticker and finds its most recent closing price

def price_thread(Ticker):
    
    #Gets data for filtering
    
    today = datetime.today()
    yesterday = today - timedelta(days=7)

    stock = yf.Ticker(Ticker)
    stock_hist = stock.history(start=yesterday, end=today, interval='1d')
    close_today = stock_hist.iloc[-1, 3]
    
    return pd.DataFrame({'Ticker': [Ticker],
                         'Price': [close_today]})

In [8]:
# Determines the current value of the portfolio

def calc_funds():
    
    # Grabs the past data from the csv
    past_data = pd.read_csv('Portfolio_History.csv')
    
    # Creates a dataframe of past tickers most recent share count sorted alphabetically by ticker
    prev_shares = past_data.iloc[1: , :]
    prev_shares = prev_shares.set_index('Ticker')
    prev_shares.sort_index(inplace=True) 
    prev_shares.reset_index(inplace=True)
    prev_shares = prev_shares.iloc[: , -1:]
    
    # Creates a dataframe of past tickers sorted alphabetically
    prev_tickers = past_data.iloc[1: , 1:2]
    prev_tickers = prev_tickers.set_index('Ticker')
    prev_tickers.sort_index(inplace=True) 
    prev_tickers.reset_index(inplace=True)
    prev_tickers = prev_tickers[['Ticker']]
    
    # Optimises the reading of ticker price
    prev_tickers['Shares'] = prev_shares
    prev_tickers = prev_tickers.dropna()
    prev_tickers = prev_tickers.reset_index()
    prev_tickers = prev_tickers[['Ticker']]
    
    prev_shares = prev_shares.dropna()
    prev_shares = prev_shares.reset_index()
    prev_shares = prev_shares.iloc[: , -1:]
    
    # Grabs the closing price for today and sorts alphabetically by ticker
    prices = grab_tickers(prev_tickers)
    prices = prices.set_index('Ticker')
    prices.sort_index(inplace=True) 
    prices = prices.reset_index()
    
    # Merges data and calculates total value of portfolio
    prev = prices
    prev['Shares'] = prev_shares
    prev['Value'] = prev.Price * prev.Shares
    availible_funds = prev.Value.sum(axis=0)
    
    return(availible_funds)

In [9]:
# Updates the Portfolio_History.csv and saves yesterdays data in Yesterday_Portfolio_History.csv

def rewriting_csv(FinalPortfolio):
    
    # Adding total value to the top of the table
    # Shares will be the total value in dollars of the total row
    today_value = FinalPortfolio.Value.sum(axis=0)
    value_row = []
    value_row.insert(0, {'Ticker': 'Total Value', 'Price': today_value, 'Shares': today_value, 'Value': today_value, 'Weight': 100.0})
    today_data = pd.concat([pd.DataFrame(value_row), FinalPortfolio], ignore_index=True)

    # reformatting columns
    today_data = today_data[['Ticker', 'Shares']]
    today = datetime.today()
    today_data.columns = ['Ticker', today]

    # Grabbing past data
    past_data = pd.read_csv('Portfolio_History.csv')
    
    # Saving yesterdays data incase of error
    saving = past_data.iloc[:,1:]
    saving.to_csv('Yesterday_Portfolio_History.csv')
    
    # removing extra index column
    past_data = past_data.iloc[: , 1:]
    full_data = past_data.merge(today_data, left_on='Ticker', right_on='Ticker', how='outer')

    # Ordering the shares alphabetically
    reorder_tickets = full_data.iloc[1:]
    reorder_tickets = reorder_tickets.set_index('Ticker')
    reorder_tickets.sort_index(inplace=True) 
    reorder_tickets = reorder_tickets.reset_index()

    # Adding ordered share data back to dataframe
    full_data.iloc[1:] = reorder_tickets

    # Updating csv file
    full_data.to_csv('Portfolio_History.csv')
    print(full_data)

In [10]:
def get_sp500():
    try:
        # Grabs the SP500 tickers from wikipedia
        wikipedia = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        sp500 = wikipedia[0]
        sp500_symbols = pd.DataFrame(sp500['Symbol'].values.tolist())
        sp500_symbols.columns = ['Tickers']

        # Saves current SP500 tickers incase of future issues
        sp500_symbols.to_csv('S&P500.csv')
    except:
        # Grabs yesterdays data instead
        print('ERROR in reading current SP500 companies')
        print('Using last successful read instead')
        sp500_symbols = pd.read_csv('S&P500.csv')
        sp500_symbols = sp500_symbols.iloc[:,1:]
        sp500_symbols.columns = [['Tickers']]
        
    return(sp500_symbols)

In [11]:
#Reads in the csv file 
Tickers = get_sp500()

print("Tickers grabbed")

#Sets constants for the data being collected
data_end = datetime.today()
data_start = data_end - timedelta(days=3653) #10 years

print("Date range set")

#Grabs market data for S&P500
market_index = yf.Ticker('^GSPC')
market_hist = market_index.history(start=data_start, end=data_end, interval='1d')
market_hist = pd.DataFrame(market_hist['Close'])
daily_market_returns = market_hist.pct_change()

print("Market data grabbed")

#Filters the stocks and gets their data
Tickers = filtering(Tickers)

print("Tickets filtered")

#Determines the riskiest stock
riskiest = riskiest(Tickers)

print("Riskiest chosen")

#Chooses the other 9 stocks
final10 = other_9(Tickers, riskiest)

print("Riskiest 10 chosen")

#Gets the weighting for the 10 stocks
FinalPortfolio = weightings(final10, calc_funds())

print("Portfolio weightings decided")
print(FinalPortfolio)

#Calculates the total weights and value to prove the porfolio is valid
totalValue = FinalPortfolio.Value.sum(axis=0)
totalWeight = FinalPortfolio.Weight.sum(axis=0)
print('Total value is: $' + str(totalValue))
print('Total weight is: ' + str(totalWeight) + '%')

#Creates a dataframe for the .csv file and creates the csv
rewriting_csv(FinalPortfolio)

print("File created")
FinalPortfolio

Tickers grabbed
Date range set
Market data grabbed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
- BRK.B: No data found, symbol may be delisted
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
- BF.B: No data found for this date range, symbol may be delisted
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


[*********************100%***********************]  3 of 3 completed


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Portfolio weightings decided
  Ticker       Price      Shares         Value  Weight
0   ENPH  200.669998  169.817184  34077.214102    35.0
1   ETSY   78.730003   61.833777   4868.173443     5.0
2   TSLA  705.210022    6.903154   4868.173443     5.0
3   PENN   28.510000  853.765942  24340.867216    25.0
4   SEDG  291.529999   16.698705   4868.173443     5.0
5   PAYC  286.140015   17.013256   4868.173443     5.0
6   QRVO   94.980003   51.254720   4868.173443     5.0
7   NVDA  162.250000   30.004151   4868.173443     5.0
8   GNRC  221.910004   21.937602   4868.173443     5.0
9   TRMB   59.430000   81.914411   4868.173443     5.0
Total value is: $97363.46886394368
Total weight is: 100.0%
         Ticker        01:25.8       16:44.4  2022-06-23 18:53:21.370964
0   Total Value  100000.000000  94871.395900                97363.468864
1           APA     867.410194           NaN                         NaN
2           BKR     164.041997           NaN                         NaN
3           DVN

Unnamed: 0,Ticker,Price,Shares,Value,Weight
0,ENPH,200.669998,169.817184,34077.214102,35.0
1,ETSY,78.730003,61.833777,4868.173443,5.0
2,TSLA,705.210022,6.903154,4868.173443,5.0
3,PENN,28.51,853.765942,24340.867216,25.0
4,SEDG,291.529999,16.698705,4868.173443,5.0
5,PAYC,286.140015,17.013256,4868.173443,5.0
6,QRVO,94.980003,51.25472,4868.173443,5.0
7,NVDA,162.25,30.004151,4868.173443,5.0
8,GNRC,221.910004,21.937602,4868.173443,5.0
9,TRMB,59.43,81.914411,4868.173443,5.0
