In [2]:
import numpy as np
import pandas as pd
import requests

#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.express as px

import time
from datetime import date

import math

import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")

import talib

In [4]:
def get_ipo_data(target):
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    }

    url = "https://stockanalysis.com/ipos/" + target + "/"

    # header is not necessary here
    response = requests.get(url, headers=headers)

    ipo_raw = pd.read_html(response.text)
    
    return ipo_raw[0]

In [5]:
df_ipo_filings = get_ipo_data('filings')
df_ipo_filings.head(20)

  ipo_raw = pd.read_html(response.text)


Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,"May 3, 2024",TBN,Tamboran Resources Corporation,-,-
1,"Apr 29, 2024",HWEC,"HW Electro Co., Ltd.",$3.00,3750000
2,"Apr 29, 2024",DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000
3,"Apr 26, 2024",EURK,Eureka Acquisition Corp,$10.00,5000000
4,"Apr 26, 2024",HDL,Super Hi International Holding Ltd.,-,-
5,"Apr 22, 2024",DRJT,Derun Group Inc,$5.00,-
6,"Apr 19, 2024",GPAT,GP-Act III Acquisition Corp.,$10.00,25000000
7,"Apr 16, 2024",JLJT,Jialiang Holdings Ltd,$5.00,-
8,"Apr 15, 2024",GAUZ,Gauzy Ltd.,-,-
9,"Apr 12, 2024",BOW,Bowhead Specialty Holdings Inc.,-,-


In [6]:
df_ipo_filings.dtypes

Filing Date       object
Symbol            object
Company Name      object
Price Range       object
Shares Offered    object
dtype: object

# Q1

In [7]:
df_ipo_filings['Filing Date'] = pd.to_datetime(df_ipo_filings['Filing Date'], format="%b %d, %Y")
df_ipo_filings['Shares Offered'] = pd.to_numeric(df_ipo_filings['Shares Offered'], errors='coerce').fillna(0)

In [8]:
def average_price_range(row):
    if row == "-":
        return np.NaN
    elif '-' in row:
        prices = row.replace('$','').split(' - ')
        prices = [float(item) for item in prices]

        return sum(prices)/len(prices)
    elif '$' in row:
        return float(row.replace('$',''))
    else:
        return row

In [9]:
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered
0,2024-05-03,TBN,Tamboran Resources Corporation,-,0.0
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0
4,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0
...,...,...,...,...,...
320,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0
321,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0
322,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0
323,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0


In [10]:
df_ipo_filings['avg_price'] = df_ipo_filings['Price Range'].apply(average_price_range).fillna(0)
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,avg_price
0,2024-05-03,TBN,Tamboran Resources Corporation,-,0.0,0.00
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0,3.00
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0,10.00
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.00
4,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0,0.00
...,...,...,...,...,...,...
320,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0,9.00
321,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0,10.00
322,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0,9.00
323,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0,4.00


In [11]:
df_ipo_filings['shares_offered_value'] = df_ipo_filings['Shares Offered'] * df_ipo_filings['avg_price']
df_ipo_filings

Unnamed: 0,Filing Date,Symbol,Company Name,Price Range,Shares Offered,avg_price,shares_offered_value
0,2024-05-03,TBN,Tamboran Resources Corporation,-,0.0,0.00,0.0
1,2024-04-29,HWEC,"HW Electro Co., Ltd.",$3.00,3750000.0,3.00,11250000.0
2,2024-04-29,DTSQ,DT Cloud Star Acquisition Corporation,$10.00,6000000.0,10.00,60000000.0
3,2024-04-26,EURK,Eureka Acquisition Corp,$10.00,5000000.0,10.00,50000000.0
4,2024-04-26,HDL,Super Hi International Holding Ltd.,-,0.0,0.00,0.0
...,...,...,...,...,...,...,...
320,2020-01-21,GOXS,"Goxus, Inc.",$8.00 - $10.00,1500000.0,9.00,13500000.0
321,2020-01-21,UTXO,"UTXO Acquisition, Inc.",$10.00,5000000.0,10.00,50000000.0
322,2019-12-09,LOHA,Loha Co. Ltd,$8.00 - $10.00,2500000.0,9.00,22500000.0
323,2019-10-04,ZGHB,China Eco-Materials Group Co. Limited,$4.00,4300000.0,4.00,17200000.0


In [12]:
df_ipo_fri_2023 = df_ipo_filings[(df_ipo_filings['Filing Date'].dt.dayofweek == 4) & (df_ipo_filings['Filing Date'].dt.year == 2023)]
q1_ans = df_ipo_fri_2023.groupby('Filing Date').agg(totalsum=('shares_offered_value', 'sum')).reset_index()

In [13]:
q1_ans['totalsum_M'] = round(q1_ans['totalsum']/1e6)
q1_ans.totalsum_M.sum()

285.0

# Q2

In [14]:
df_ipo_2023 = get_ipo_data("2023")
df_ipo_2024 = get_ipo_data("2024")

  ipo_raw = pd.read_html(response.text)
  ipo_raw = pd.read_html(response.text)


In [15]:
df_ipo_2024.head()

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"May 1, 2024",VIK,Viking Holdings Ltd.,$24.00,$28.65,19.38%
1,"Apr 26, 2024",ZONE,"CleanCore Solutions, Inc.",$4.00,$3.15,-21.25%
2,"Apr 25, 2024",RBRK,"Rubrik, Inc.",$32.00,$33.77,5.53%
3,"Apr 25, 2024",LOAR,Loar Holdings Inc.,$28.00,$49.83,77.96%
4,"Apr 25, 2024",MRX,Marex Group plc,$19.00,$19.17,0.89%


In [16]:
df_ipo = pd.concat([df_ipo_2023, df_ipo_2024], axis=0)
df_ipo = df_ipo[df_ipo['Symbol'] != 'RYZB']

In [17]:
df_ipo[df_ipo['Symbol'] == 'IROH']

Unnamed: 0,IPO Date,Symbol,Company Name,IPO Price,Current,Return
0,"Dec 27, 2023",IROH,Iron Horse Acquisitions Corp.,$10.00,$10.05,0.50%


In [18]:
ticket_list = df_ipo['Symbol'].to_list()

In [19]:
def download_ipo_data(ticket_list):
    dataframes = {}

    for i in ticket_list:
        df = yf.download(tickers=i, period='max', interval='1d')

        if not df.empty:
            dataframes[f'df_{i}'] = df
        else:
            print(f'No data found for {i}')

    return dataframes

In [20]:
stock_data_dfs = download_ipo_data(ticket_list)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

No data found for PTHR


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

No data found for BKHA


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

In [21]:
# Define the columns
columns = ['symbol', 'optimal_day', 'highest_quantile']

# Create an empty list to store data rows
result = pd.DataFrame()

for symbol, df in stock_data_dfs.items():
    
    temp_result = pd.DataFrame({'symbol':[symbol]})
    # Calculate growth rates
    for i in range(1, 31):
        # Ensure that there is an element at index i after the shift
        if i < len(df):  # Check if 'i' is a valid index in the DataFrame after the shift
            growth_value = (df['Adj Close'] / df['Adj Close'].shift(i)).iloc[i]
            temp = pd.DataFrame({
                'symbol': [symbol],
                f'growth_future_' + str(i) + 'd': [growth_value]
            })  # Using a consistent index [0] for easy concatenation
            temp_result = pd.merge(temp_result, temp, how='left', on='symbol')
        else:
            break  # Break the loop if 'i' is out of bounds
    result = pd.concat([result, temp_result], axis=0)

In [22]:
result

Unnamed: 0,symbol,growth_future_1d,growth_future_2d,growth_future_3d,growth_future_4d,growth_future_5d,growth_future_6d,growth_future_7d,growth_future_8d,growth_future_9d,...,growth_future_21d,growth_future_22d,growth_future_23d,growth_future_24d,growth_future_25d,growth_future_26d,growth_future_27d,growth_future_28d,growth_future_29d,growth_future_30d
0,df_IROH,1.000999,1.000500,1.000999,1.000000,1.000999,1.000000,0.997003,0.997003,0.997003,...,0.997702,0.997702,0.997502,0.997902,0.996004,0.997003,0.997502,0.997003,0.998002,0.998002
0,df_LGCB,0.811224,0.877551,0.795918,0.844898,0.833673,0.859184,0.928571,0.872449,0.806122,...,0.785714,0.790306,0.910204,0.862245,0.831633,0.816327,0.785714,0.795918,0.892857,0.994898
0,df_ZKH,1.000000,1.000000,1.011613,1.008387,1.003871,1.029032,1.025806,1.040000,1.052258,...,1.079355,1.038710,1.045161,1.084516,1.194839,1.228387,1.106452,1.145161,1.169032,1.100645
0,df_BAYA,1.000999,1.003996,1.004995,1.004995,1.004995,1.006893,1.006194,1.005994,1.005994,...,1.011988,1.011988,1.011988,1.011988,1.011988,1.011988,1.011988,1.011988,1.011988,1.011988
0,df_INHD,0.153569,0.133381,0.122206,0.113194,0.108147,0.107426,0.099495,0.090123,0.108508,...,0.077866,0.077145,0.076424,0.076424,0.069430,0.069935,0.081182,0.077866,0.083922,0.081471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,df_PSBD,1.011118,1.004324,1.016677,1.016677,1.008030,1.003088,1.009883,1.002471,1.009883,...,1.016059,1.024089,1.035207,1.006177,1.009265,0.997529,0.990117,1.011118,1.004324,1.004324
0,df_CCTG,1.168831,1.298701,1.267533,1.929870,2.171429,2.529870,2.168831,2.359740,2.751948,...,0.467532,0.381818,0.388312,0.370130,0.358442,0.348052,0.353247,0.393506,0.376623,0.370130
0,df_SYNX,1.011080,1.030471,0.980609,0.850416,0.806094,0.819945,0.798061,0.828255,0.831025,...,0.994460,0.988920,0.941828,0.933518,0.941828,0.977839,0.966759,0.983934,0.969529,0.989474
0,df_SDHC,0.991304,1.010766,1.028986,1.014079,1.002070,1.022360,1.077019,1.033126,1.072464,...,1.117598,1.148240,1.161491,1.157350,1.175569,1.167702,1.186749,1.193789,1.163561,1.171843


In [23]:
# List of growth columns names
growth_columns = ['growth_future_' + str(i) + 'd' for i in range(1, 31)]

# Calculate the 75th quantile for each growth column
quantiles_75th = result[growth_columns].quantile(0.75)

# Identify the day with the highest 75th quantile
print('optimal_day:', quantiles_75th.idxmax())
print('highest_quantile:', quantiles_75th.max())


optimal_day: growth_future_28d
highest_quantile: 1.039141956220915


# Q3

In [4]:
US_STOCKS = ['MSFT', 'AAPL', 'GOOG', 'NVDA', 'AMZN', 'META', 'BRK-B', 'LLY', 'AVGO','V', 'JPM']

EU_STOCKS = ['NVO','MC.PA', 'ASML', 'RMS.PA', 'OR.PA', 'SAP', 'ACN', 'TTE', 'SIE.DE','IDEXY','CDI.PA']

INDIA_STOCKS = ['RELIANCE.NS','TCS.NS','HDB','BHARTIARTL.NS','IBN','SBIN.NS','LICI.NS','INFY','ITC.NS','HINDUNILVR.NS','LT.NS']

LARGEST_STOCKS = US_STOCKS + EU_STOCKS + INDIA_STOCKS

NEW_US = ['TSLA','WMT','XOM','UNH','MA','PG','JNJ','MRK','HD','COST','ORCL']

NEW_EU = ['PRX.AS','CDI.PA','AIR.PA','SU.PA','ETN','SNY','BUD','DTE.DE','ALV.DE','MDT','AI.PA','EL.PA']

NEW_INDIA = ['BAJFINANCE.NS','MARUTI.NS','HCLTECH.NS','TATAMOTORS.NS','SUNPHARMA.NS','ONGC.NS','ADANIENT.NS','ADANIENT.NS','NTPC.NS','KOTAKBANK.NS','TITAN.NS']

LARGE_STOCKS = NEW_EU + NEW_US + NEW_INDIA

all_tickers = LARGEST_STOCKS + LARGE_STOCKS

print(f'# of LARGEST_STOCKS: {len(LARGEST_STOCKS)}')
print(f'# of LARGE_STOCKS: {len(LARGE_STOCKS)}')
print(f'# of all_tickers: {len(all_tickers)}')

# of LARGEST_STOCKS: 33
# of LARGE_STOCKS: 34
# of all_tickers: 67


In [133]:
df_stocks = pd.DataFrame()
for i, ticker in enumerate(all_tickers):
    print(i, ticker)

    # stock price
    history_prices = yf.download(tickers=ticker, period='max', interval='1d')

    

    # feature generation
    history_prices['ticker'] = ticker
    history_prices['year'] = history_prices.index.year
    history_prices['month'] = history_prices.index.month
    history_prices['weekday'] = history_prices.index.weekday
    history_prices['date'] = pd.to_datetime(history_prices.index.date)

    history_prices = history_prices[(history_prices['date'] >= '2013-12-20') & (history_prices['date'] <= '2023-12-31')]


    # historical returns
    for j in [1,3,7,30,90,180,365,545,730]:
        history_prices['growth_' + str(j) + 'd'] = history_prices['Adj Close'] / history_prices['Adj Close'].shift(j)
    
    history_prices['growth_future_7d'] = history_prices['Adj Close'].shift(-7) / history_prices['Adj Close']

    # Technical indicator
    # SimpleMovingAverage 10D and 20D
    history_prices['MA10'] = history_prices['Close'].rolling(10).mean()
    history_prices['MA20'] = history_prices['Close'].rolling(20).mean()
    history_prices['growing_ma'] = np.where(history_prices['MA10'] > history_prices['MA20'], 1, 0)
    history_prices['high_minus_low_relative'] = (history_prices.High - history_prices.Low) / history_prices['Adj Close']

    # 30D rolling volatility
    history_prices['volatility'] = history_prices['Adj Close'].rolling(30).std()*np.sqrt(252)

    history_prices['is_positive_growth_7d_future'] = np.where(history_prices['growth_future_7d'] > 1, 1, 0)

    # sleep 1 sec between downloads - not to overload the API server
    time.sleep(1)

    if df_stocks.empty:
        df_stocks = history_prices
    else:
        df_stocks = pd.concat([df_stocks, history_prices], ignore_index=True)

0 MSFT


[*********************100%%**********************]  1 of 1 completed


1 AAPL


[*********************100%%**********************]  1 of 1 completed


2 GOOG


[*********************100%%**********************]  1 of 1 completed


3 NVDA


[*********************100%%**********************]  1 of 1 completed


4 AMZN


[*********************100%%**********************]  1 of 1 completed


5 META


[*********************100%%**********************]  1 of 1 completed


6 BRK-B


[*********************100%%**********************]  1 of 1 completed


7 LLY


[*********************100%%**********************]  1 of 1 completed


8 AVGO


[*********************100%%**********************]  1 of 1 completed


9 V


[*********************100%%**********************]  1 of 1 completed


10 JPM


[*********************100%%**********************]  1 of 1 completed


11 NVO


[*********************100%%**********************]  1 of 1 completed


12 MC.PA


[*********************100%%**********************]  1 of 1 completed


13 ASML


[*********************100%%**********************]  1 of 1 completed


14 RMS.PA


[*********************100%%**********************]  1 of 1 completed


15 OR.PA


[*********************100%%**********************]  1 of 1 completed


16 SAP


[*********************100%%**********************]  1 of 1 completed


17 ACN


[*********************100%%**********************]  1 of 1 completed


18 TTE


[*********************100%%**********************]  1 of 1 completed


19 SIE.DE


[*********************100%%**********************]  1 of 1 completed


20 IDEXY


[*********************100%%**********************]  1 of 1 completed


21 CDI.PA


[*********************100%%**********************]  1 of 1 completed


22 RELIANCE.NS


[*********************100%%**********************]  1 of 1 completed


23 TCS.NS


[*********************100%%**********************]  1 of 1 completed


24 HDB


[*********************100%%**********************]  1 of 1 completed


25 BHARTIARTL.NS


[*********************100%%**********************]  1 of 1 completed


26 IBN


[*********************100%%**********************]  1 of 1 completed


27 SBIN.NS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

28 LICI.NS





29 INFY


[*********************100%%**********************]  1 of 1 completed


30 ITC.NS


[*********************100%%**********************]  1 of 1 completed


31 HINDUNILVR.NS


[*********************100%%**********************]  1 of 1 completed


32 LT.NS


[*********************100%%**********************]  1 of 1 completed


33 PRX.AS


[*********************100%%**********************]  1 of 1 completed


34 CDI.PA


[*********************100%%**********************]  1 of 1 completed


35 AIR.PA


[*********************100%%**********************]  1 of 1 completed


36 SU.PA


[*********************100%%**********************]  1 of 1 completed


37 ETN


[*********************100%%**********************]  1 of 1 completed


38 SNY


[*********************100%%**********************]  1 of 1 completed


39 BUD


[*********************100%%**********************]  1 of 1 completed


40 DTE.DE


[*********************100%%**********************]  1 of 1 completed


41 ALV.DE


[*********************100%%**********************]  1 of 1 completed


42 MDT


[*********************100%%**********************]  1 of 1 completed


43 AI.PA


[*********************100%%**********************]  1 of 1 completed


44 EL.PA


[*********************100%%**********************]  1 of 1 completed


45 TSLA


[*********************100%%**********************]  1 of 1 completed


46 WMT


[*********************100%%**********************]  1 of 1 completed


47 XOM


[*********************100%%**********************]  1 of 1 completed


48 UNH


[*********************100%%**********************]  1 of 1 completed


49 MA


[*********************100%%**********************]  1 of 1 completed


50 PG


[*********************100%%**********************]  1 of 1 completed


51 JNJ


[*********************100%%**********************]  1 of 1 completed


52 MRK


[*********************100%%**********************]  1 of 1 completed


53 HD


[*********************100%%**********************]  1 of 1 completed


54 COST


[*********************100%%**********************]  1 of 1 completed


55 ORCL


[*********************100%%**********************]  1 of 1 completed


56 BAJFINANCE.NS


[*********************100%%**********************]  1 of 1 completed


57 MARUTI.NS


[*********************100%%**********************]  1 of 1 completed


58 HCLTECH.NS


[*********************100%%**********************]  1 of 1 completed


59 TATAMOTORS.NS


[*********************100%%**********************]  1 of 1 completed


60 SUNPHARMA.NS


[*********************100%%**********************]  1 of 1 completed


61 ONGC.NS


[*********************100%%**********************]  1 of 1 completed


62 ADANIENT.NS


[*********************100%%**********************]  1 of 1 completed


63 ADANIENT.NS


[*********************100%%**********************]  1 of 1 completed


64 NTPC.NS


[*********************100%%**********************]  1 of 1 completed


65 KOTAKBANK.NS


[*********************100%%**********************]  1 of 1 completed


66 TITAN.NS


[*********************100%%**********************]  1 of 1 completed


In [142]:
df_stocks['category'] = np.where(df_stocks['ticker'].isin(LARGE_STOCKS),'LARGE_STOCKS', 'LARGEST_STOCKS')

In [135]:
df_stocker_7d = df_stocks[['category', 'ticker', 'date', 'growth_7d']].dropna()
df_stocker_7d

Unnamed: 0,category,ticker,date,growth_7d
7,LARGEST_STOCKS,MSFT,2014-01-02,1.009782
8,LARGEST_STOCKS,MSFT,2014-01-03,1.007920
9,LARGEST_STOCKS,MSFT,2014-01-06,0.974379
10,LARGEST_STOCKS,MSFT,2014-01-07,0.972489
11,LARGEST_STOCKS,MSFT,2014-01-08,0.958970
...,...,...,...,...
165039,LARGE_STOCKS,TITAN.NS,2023-12-22,1.006940
165040,LARGE_STOCKS,TITAN.NS,2023-12-26,1.018182
165041,LARGE_STOCKS,TITAN.NS,2023-12-27,1.024635
165042,LARGE_STOCKS,TITAN.NS,2023-12-28,1.026384


In [136]:
df_7d_avg = df_stocker_7d.groupby(['category','date']).agg(avg_growth_7d=('growth_7d','mean')).reset_index()
df_transformed = df_7d_avg.pivot(index='date', columns='category', values='avg_growth_7d')
df_transformed.columns = ['avg_growth_7D_largest', 'avg_growth_7D_large']
df_transformed = df_transformed.reset_index()
df_transformed['large_win'] = np.where(df_transformed['avg_growth_7D_large'] > df_transformed['avg_growth_7D_largest'], 1, 0)

In [137]:
df_transformed.date.nunique()

2595

In [140]:
q3ans = round(df_transformed.large_win.sum()/(df_transformed.date.nunique())*100)
q3ans

48

# Q4

In [23]:
def ticker_download(ticker_list):
    df_result = pd.DataFrame()
    for i, ticker in enumerate(ticker_list):
        print(i, ticker)

        # stock price
        df = yf.download(tickers=ticker, period='max', interval='1d')

        # feature generation
        df['ticker'] = ticker
        df['year'] = df.index.year
        df['month'] = df.index.month
        df['weekday'] = df.index.weekday
        df['date'] = pd.to_datetime(df.index.date)

        df = df[(df['date'] >= '2013-12-20') & (df['date'] <= '2023-12-31')]

        df['CCI'] = talib.CCI(df.High.values, df.Low.values, df.Close.values, timeperiod=14)

        time.sleep(1)

        if df_result.empty:
            df_result = df
        else:
            df_result = pd.concat([df_result, df], ignore_index=True)

    return df_result

In [29]:
df_stocks_q4 = ticker_download(LARGEST_STOCKS)

0 MSFT


[*********************100%%**********************]  1 of 1 completed


1 AAPL


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

2 GOOG





3 NVDA


[*********************100%%**********************]  1 of 1 completed


4 AMZN


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

5 META





6 BRK-B


[*********************100%%**********************]  1 of 1 completed


7 LLY


[*********************100%%**********************]  1 of 1 completed


8 AVGO


[*********************100%%**********************]  1 of 1 completed


9 V


[*********************100%%**********************]  1 of 1 completed


10 JPM


[*********************100%%**********************]  1 of 1 completed


11 NVO


[*********************100%%**********************]  1 of 1 completed


12 MC.PA


[*********************100%%**********************]  1 of 1 completed


13 ASML


[*********************100%%**********************]  1 of 1 completed


14 RMS.PA


[*********************100%%**********************]  1 of 1 completed


15 OR.PA


[*********************100%%**********************]  1 of 1 completed


16 SAP


[*********************100%%**********************]  1 of 1 completed


17 ACN


[*********************100%%**********************]  1 of 1 completed


18 TTE


[*********************100%%**********************]  1 of 1 completed


19 SIE.DE


[*********************100%%**********************]  1 of 1 completed


20 IDEXY


[*********************100%%**********************]  1 of 1 completed


21 CDI.PA


[*********************100%%**********************]  1 of 1 completed


22 RELIANCE.NS


[*********************100%%**********************]  1 of 1 completed


23 TCS.NS


[*********************100%%**********************]  1 of 1 completed


24 HDB


[*********************100%%**********************]  1 of 1 completed


25 BHARTIARTL.NS


[*********************100%%**********************]  1 of 1 completed


26 IBN


[*********************100%%**********************]  1 of 1 completed


27 SBIN.NS


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

28 LICI.NS





29 INFY


[*********************100%%**********************]  1 of 1 completed


30 ITC.NS


[*********************100%%**********************]  1 of 1 completed


31 HINDUNILVR.NS


[*********************100%%**********************]  1 of 1 completed


32 LT.NS


[*********************100%%**********************]  1 of 1 completed


In [80]:
df_stocks_q4_filter = df_stocks_q4[(df_stocks_q4['date'].dt.day_of_week == 4) & ~df_stocks_q4['CCI'].isna()]

In [91]:
df_stocks_q4_filter

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,ticker,year,month,weekday,date,CCI
13,35.900002,36.150002,35.750000,36.040001,30.291704,40548800,MSFT,2014,1,4,2014-01-10,-98.137328
18,36.830002,36.830002,36.150002,36.380001,30.577467,46267500,MSFT,2014,1,4,2014-01-17,9.739628
22,37.450001,37.549999,36.529999,36.810001,30.938900,76395500,MSFT,2014,1,4,2014-01-24,137.751671
27,36.950001,37.889999,36.560001,37.840000,31.804604,93162300,MSFT,2014,1,4,2014-01-31,176.485954
32,36.320000,36.590000,36.009998,36.560001,30.728765,33260500,MSFT,2014,2,4,2014-02-07,-11.391088
...,...,...,...,...,...,...,...,...,...,...,...,...
80959,3129.949951,3197.949951,3121.050049,3190.649902,3190.649902,2112908,LT.NS,2023,12,4,2023-12-01,273.278565
80964,3377.000000,3430.000000,3366.000000,3378.449951,3378.449951,2428891,LT.NS,2023,12,4,2023-12-08,117.076223
80969,3435.149902,3498.899902,3432.850098,3488.000000,3488.000000,2632935,LT.NS,2023,12,4,2023-12-15,103.918813
80974,3424.000000,3496.000000,3408.600098,3477.949951,3477.949951,1681707,LT.NS,2023,12,4,2023-12-22,70.767162


In [81]:
df_msft = df_stocks_q4_filter[df_stocks_q4_filter['ticker'] == 'MSFT']

In [114]:
df_result = pd.DataFrame()

for ticker in df_stocks_q4_filter['ticker'].unique():
    df = df_stocks_q4_filter[df_stocks_q4_filter['ticker'] == ticker].copy()
    df['sell_price'] = df['Adj Close'].shift(-1)  # Assign future price before calculating other columns
    df['investment'] = np.where((df['CCI'] > 200) & (df['Adj Close'] <= 1000), 1000, 0)
    df['amount'] = np.floor(df['investment'] / df['Adj Close']).astype(int)
    df['profit'] = (df['sell_price'] - df['Adj Close']) * df['amount']
    df = df.dropna()
    # Create a DataFrame for this ticker's results
    df_temp_result = pd.DataFrame({
        'ticker': [ticker],
        'profit': [df['profit'].sum()]
    })

    # Concatenate the result for this ticker
    df_result = pd.concat([df_result, df_temp_result], axis=0)

df_result.reset_index(drop=True, inplace=True)

In [119]:
df_result.profit.sum()/1000

0.7593863606452942