In [2]:
import yfinance as yf
import pandas as pd
import talib
import time
pd.set_option('display.max_colwidth', None)

In [3]:
symbols_df = pd.read_csv("../Resources/symbols2.csv")
symbols_df = symbols_df[symbols_df['Symbol'].str.len() < 5]
symbols_df.head()

Unnamed: 0,Symbol,Name,Last,Change,%Chg,High,Low,Volume,Time
0,VCIG,Vci Global Limited Ordinary Share,4.1,2.31,+129.05%,7.74,2.33,257576188.0,2024-11-27
1,NVDA,Nvidia Corp,135.34,-1.58,-1.15%,137.22,131.8,226370891.0,2024-11-27
2,MARA,Mara Holdings Inc,26.92,1.95,+7.81%,27.32,25.0,87941695.0,2024-11-27
3,BBD,Banco Bradesco S.A. ADR,2.26,-0.11,-4.64%,2.37,2.24,69509200.0,2024-11-27
4,SMCI,Super Micro Computer,35.07,0.64,+1.86%,37.14,34.43,63261301.0,2024-11-27


In [4]:
tickers = symbols_df["Symbol"]
print(len(tickers))

197


In [36]:
# Defining Lists
validTickers = []
invalidTickers = []
Volume = []
trainingVolume = []
outstandingShares = []
trainingOutstandingShares = []
# Checking to see if tickers within CSV are valid
for ticker in tickers:
    try:
        print(f"Fetching data for: {ticker}")
        stock = yf.Ticker(ticker)
        historical_data = stock.history(period="1mo")

#Extract volume
        volume = historical_data['Volume']
        Volume.append(volume)
        info = stock.info
        
# Extract shares outstanding
        shares_outstanding = info.get('sharesOutstanding')
        outstandingShares.append(shares_outstanding)
# Track valid tickers
        if not historical_data.empty:
            validTickers.append(ticker)
        else:
            raise ValueError(f"No data found for {ticker}")
# Track invalid tickers
    except (ValueError, KeyError, Exception) as e:
        print(f"Error fetching data for {ticker}: {e}")
        invalidTickers.append(ticker)  


Fetching data for: VCIG
Fetching data for: NVDA
Fetching data for: MARA
Fetching data for: BBD
Fetching data for: SMCI
Fetching data for: ACHR
Fetching data for: RGTI
Fetching data for: BITF
Fetching data for: TSLA
Fetching data for: UMAC
Fetching data for: PLUG
Fetching data for: INTC
Fetching data for: LCID
Fetching data for: PLTR
Fetching data for: RIVN
Fetching data for: SOFI
Fetching data for: QUBT
Fetching data for: NIO
Fetching data for: IREN
Fetching data for: OPEN
Fetching data for: DELL
Fetching data for: ABEV
Fetching data for: GRAB
Fetching data for: MSTR
Fetching data for: ITUB
Fetching data for: CLSK
Fetching data for: AAPL
Fetching data for: T
Fetching data for: SOUN
Fetching data for: BTBT
Fetching data for: NU
Fetching data for: AMD
Fetching data for: CYCN
Fetching data for: RIOT
Fetching data for: F
Fetching data for: PFE
Fetching data for: VALE
Fetching data for: AMZN
Fetching data for: HPQ
Fetching data for: IQ
Fetching data for: RKLB
Fetching data for: WBD
Fetching

In [37]:
volume_df = pd.DataFrame({"Tickers":validTickers,"Volume":Volume,"outstandingShares":outstandingShares})

In [38]:
print(len(volume_df))

197


In [39]:
# Initialize an empty list to store the extracted final VWAP values
final_volume_values = []
avg_volumes = []
recent_volumes = []
# Loop through each row in the DataFrame
for index, row in volume_df.iterrows():
     # Access the VWAP column as a string
    volume_value = str(row['Volume'])
# Split the string into parts
    parts = volume_value.split()
# Get recent daily volume
    recent_volume = parts[-8]
    recent_volumes.append(recent_volume)
# Extract numeric values
    volumes = [int(part) for part in parts if part.isdigit()]
    days = len(volumes)
    volumes = sum(volumes)
    avg_volume = volumes/days
    final_volume_values.append(volumes)
    avg_volumes.append(avg_volume)

In [40]:
# Add the extracted values to a new data frame
volume_df = pd.DataFrame({"Ticker":tickers,"Volume":final_volume_values, "outstandingShares":outstandingShares, "avgVolume":avg_volumes, \
                          "recentVolume":recent_volumes})
# Drop null values
volume_df = volume_df.dropna()
# Recasting data types
volume_df["Volume"] = volume_df["Volume"].astype("int64")
volume_df["recentVolume"] = volume_df["recentVolume"].astype("int64")

# Calculating Turnover Ratio
turnover_ratio = volume_df["Volume"] / volume_df["outstandingShares"]
volume_df["turnoverRatio"] = turnover_ratio

# Converting to standard notation for simplicity
volume_df['avgVolume'] = volume_df['avgVolume'].apply(lambda x: f"{x:.0f}")
volume_df["avgVolume"] = volume_df["avgVolume"].astype("int64")
volume_df['outstandingShares'] = volume_df['outstandingShares'].apply(lambda x: f"{x:.0f}")
volume_df["outstandingShares"] = volume_df["outstandingShares"].astype("int64")

# Calculating Relative Volume
relativeVolume = volume_df["recentVolume"] / volume_df["avgVolume"]
volume_df["relativeVolume"] = relativeVolume
volume_df.head()


Unnamed: 0,Ticker,Volume,outstandingShares,avgVolume,recentVolume,turnoverRatio,relativeVolume
0,VCIG,327160400,4000000,15579067,3199900,81.7901,0.205397
1,NVDA,4669843600,24490000384,222373505,164414000,0.190684,0.73936
2,MARA,1867447600,321831008,88926076,60750200,5.802572,0.683154
3,BBD,660542600,5295830016,31454410,25471900,0.124729,0.809804
4,SMCI,2487806900,585564992,118466995,204353000,4.248558,1.724978


In [41]:
# Calculate turnover ratio for each stock
low_threshold = volume_df['turnoverRatio'].quantile(0.25)
high_threshold = volume_df['turnoverRatio'].quantile(0.9)

# Filter stocks dynamically
filtered_tickers = volume_df[
    (volume_df['turnoverRatio'] >= low_threshold) &
    (volume_df['turnoverRatio'] <= high_threshold)
]['Ticker']

In [42]:
print(len(filtered_tickers))

128


In [43]:
volume_df = volume_df[volume_df['Ticker'].isin(filtered_tickers)]

In [44]:
print(len(volume_df))

128


In [56]:

volume_df = volume_df[(volume_df['relativeVolume'] >= 0.55)]
print(len(volume_df))

113


In [58]:
volume_df.to_csv('../Resources/Filtererd_by_Liquidity.csv', index=False, mode='w')