In [90]:
%load_ext autoreload
%autoreload all
import numpy as np
import pandas as pd
import pandas_ta as ta
from faker import Faker
from custom_providers import dates, names_and_tickers, price_columns, sectors
from calculated_columns.moving_average import calculate_moving_average
from calculated_columns.macd import calculate_macd
from utilities.missing_values import add_missing_values_to_df
import talib

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [91]:
# Specify number of rows and price range
num_rows = 100
price_range = (100, 1000)
time_period_in_days = 8

# Add custom providers
fake = Faker()
fake.add_provider(dates.Dates)
fake.add_provider(names_and_tickers.NamesAndTickers)
fake.add_provider(price_columns.PriceColumns)
fake.add_provider(sectors.Sectors)

In [92]:
# Generate values for initial columns
date_df = fake.generate_dates(time_period_in_days=time_period_in_days,
                              num_rows=num_rows,)

# name_prob_dict = {"Company": 0.1, "Ticker": 0.1}
name_ticker_df = fake.generate_names_and_tickers(
    time_period_in_days=time_period_in_days,
    num_rows=num_rows
)#, **name_prob_dict)

sector_df = fake.generate_sectors(
    num_rows=num_rows,
    time_period_in_days=time_period_in_days
)

# price_prob_dict = {
#     "Open Price": 0.1,
#     "Close Price": 0.1,
#     "High Price": 0.1,
#     "Low Price": 0.1,
#     "Volume": 0.1
# }

price_df = fake.generate_prices(
    num_rows=num_rows,
    price_range=price_range
)

In [93]:
df = pd.concat([date_df, name_ticker_df, sector_df, price_df], axis=1)
df.head()

Unnamed: 0,Date,Company,Ticker,Sector,Open Price,Close Price,High Price,Low Price,Volume
0,2023-01-27,Herrera-Stanton,HS6OA,Technology,141.397455,139.889615,144.257289,124.659403,241915
1,2023-01-28,Herrera-Stanton,HS6OA,Technology,131.514632,128.9098,134.374466,113.679588,737952
2,2023-01-29,Herrera-Stanton,HS6OA,Technology,112.707259,116.794285,119.654119,97.477046,218124
3,2023-01-30,Herrera-Stanton,HS6OA,Technology,144.61673,148.530827,151.39066,129.386517,447477
4,2023-01-31,Herrera-Stanton,HS6OA,Technology,277.88439,285.471934,288.331767,262.654177,535830


In [94]:
window = 10

# Calculate and add Moving Average with numpy
df['Moving Average'] = calculate_moving_average(df["Close Price"], window)

# Relative Strength Index with pandas_ta
df['RSI'] = ta.rsi(df['Close Price'], length=14)

# Calculate and add MACD and Signal Line columns to df
calculate_macd(df, "Close Price")

# Bollinger Bands with TALib
df['SMA'] = talib.SMA(df['Close Price'], timeperiod=window)
df['Upper_Band'], df['Middle_Band'], df['Lower_Band'] = talib.BBANDS(df['Close Price'], timeperiod=window, nbdevup=2, nbdevdn=2)

# On-Balance Volume (OBV) with TALib
df['OBV'] = talib.OBV(df['Close Price'], df['Volume'])

#Chaikin Money Flow with TALib
df['CMF'] = talib.ADOSC(df['High Price'], df['Low Price'], df['Close Price'], df['Volume'], fastperiod=3, slowperiod=10)

In [95]:
# Calculate RSI
rsi_period = 14  # RSI period, typically 14 days
rsi_values = talib.RSI(df['Close Price'], timeperiod=rsi_period)

# Add RSI values to the DataFrame
df['rsi'] = rsi_values

In [96]:
len(df.columns)

20

In [97]:
df.isnull().sum()

Date               0
Company            0
Ticker             0
Sector             0
Open Price         0
Close Price        0
High Price         0
Low Price          0
Volume             0
Moving Average     9
RSI               14
MACD               0
Signal_Line        0
SMA                9
Upper_Band         9
Middle_Band        9
Lower_Band         9
OBV                0
CMF                9
rsi               14
dtype: int64

In [98]:
df.dropna(axis=0, inplace=True)
df.reset_index(inplace=True)

In [99]:
df.isnull().sum()

index             0
Date              0
Company           0
Ticker            0
Sector            0
Open Price        0
Close Price       0
High Price        0
Low Price         0
Volume            0
Moving Average    0
RSI               0
MACD              0
Signal_Line       0
SMA               0
Upper_Band        0
Middle_Band       0
Lower_Band        0
OBV               0
CMF               0
rsi               0
dtype: int64

In [100]:
prob_dict = {
    "Date": 0.5,
    "OBV": 0.5,
    "RSI": 0.5,
    "Volume": 0.5
}
add_missing_values_to_df(df=df, **prob_dict)

In [101]:
df

Unnamed: 0,index,Date,Company,Ticker,Sector,Open Price,Close Price,High Price,Low Price,Volume,...,RSI,MACD,Signal_Line,SMA,Upper_Band,Middle_Band,Lower_Band,OBV,CMF,rsi
0,14,,Bennett-Olson,BOTQ0,Healthcare,790.212876,796.073262,798.933096,774.982663,,...,93.536855,173.638602,118.142509,617.746015,999.403761,617.746015,236.088269,,9.203071e+05,93.536855
1,15,2023-02-03,Bennett-Olson,BOTQ0,Healthcare,751.096009,745.654323,753.955843,730.424110,622380.0,...,87.250378,172.792214,129.072450,662.529058,983.846189,662.529058,341.211927,1820362.0,8.948147e+05,87.250378
2,16,2023-01-27,Hurst-Whitehead,HWT92,Consumer Goods,807.879221,807.246443,810.739055,792.016230,529332.0,...,,175.073279,138.272616,707.331020,965.780601,707.331020,448.881440,2349694.0,9.086265e+05,88.286098
3,17,,Hurst-Whitehead,HWT92,Consumer Goods,866.942206,866.822617,869.802039,851.592405,326837.0,...,,179.617816,146.541656,748.110983,961.784661,748.110983,534.437304,2676531.0,9.016375e+05,89.200002
4,18,2023-01-29,Hurst-Whitehead,HWT92,Consumer Goods,818.127367,814.745041,820.987200,799.514828,299106.0,...,83.096949,176.977088,152.628742,781.650586,900.139658,781.650586,663.161515,2377425.0,8.566483e+05,83.096949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,95,2023-02-03,Fernandez-Parker,FPVF2,Technology,1002.378969,999.504711,1005.238803,984.274499,,...,52.113572,1.775411,2.171572,1000.179320,1005.457978,1000.179320,994.900663,-1167213.0,9.446632e+05,52.113572
82,96,2023-01-27,"Williams, Cook and Ramsey",WCARKVS,Healthcare,1000.543541,1000.584695,1003.444529,985.313328,,...,,1.672569,2.071771,1000.031001,1005.170580,1000.031001,994.891421,,9.983108e+05,53.187754
83,97,2023-01-28,"Williams, Cook and Ramsey",WCARKVS,Healthcare,1001.117207,996.122958,1003.977041,980.892746,269214.0,...,48.361233,1.217012,1.900820,999.471961,1004.962149,999.471961,993.981773,,9.568727e+05,48.361233
84,98,,"Williams, Cook and Ramsey",WCARKVS,Healthcare,999.196001,1001.490850,1004.350684,983.965788,849070.0,...,53.793825,1.274433,1.775542,999.793565,1005.342038,999.793565,994.245091,,1.047289e+06,53.793825
