In [9]:
# IMPORTS
import numpy as np
import pandas as pd

#Fin Data Sources
import yfinance as yf
import pandas_datareader as pdr

#Data viz
import plotly.graph_objs as go
import plotly.graph_objects as go
import plotly.express as px

import time
from datetime import date, timedelta
import datetime
import calendar

# for graphs
import matplotlib.pyplot as plt

In [6]:
df_full = pd.read_parquet("content/stocks_df_combined_2024_05_07.parquet.brotli")

In [14]:
df_full.dtypes[:10]

Open                  float64
High                  float64
Low                   float64
Close                 float64
Adj Close_x           float64
Volume                float64
Ticker                 object
Year                    int32
Month          datetime64[ns]
Weekday                 int32
dtype: object

In [18]:
CATEGORICAL = ['Month', 'Weekday', 'Ticker', 'ticker_type', 'wom']

In [25]:
def week_of_month(tgtdate):
    # tgtdate = tgtdate.to_datetime()

    days_this_month = calendar.mdays[tgtdate.month]
    for i in range(1, days_this_month):
        d = datetime.datetime(tgtdate.year, tgtdate.month, i)
        if d.day - d.weekday() > 0:
            startdate = d
            break
    # now we canuse the modulo 7 appraoch
    return 'week_' + str((tgtdate - startdate).days //7 + 1)

In [39]:
df_full.head(10)

Unnamed: 0,Open,High,Low,Close,Adj Close_x,Volume,Ticker,Year,Month,Weekday,...,growth_brent_oil_30d,growth_brent_oil_90d,growth_brent_oil_365d,growth_btc_usd_1d,growth_btc_usd_3d,growth_btc_usd_7d,growth_btc_usd_30d,growth_btc_usd_90d,growth_btc_usd_365d,wom
0,0.088542,0.101563,0.088542,0.097222,0.060163,1031789000.0,MSFT,1986,1986-03-01,3,...,,,,,,,,,,week_0
1,0.097222,0.102431,0.097222,0.100694,0.062311,308160000.0,MSFT,1986,1986-03-01,4,...,,,,,,,,,,week_0
2,0.100694,0.103299,0.100694,0.102431,0.063386,133171200.0,MSFT,1986,1986-03-01,0,...,,,,,,,,,,week_0
3,0.102431,0.103299,0.098958,0.099826,0.061774,67766400.0,MSFT,1986,1986-03-01,1,...,,,,,,,,,,week_0
4,0.099826,0.100694,0.097222,0.09809,0.0607,47894400.0,MSFT,1986,1986-03-01,2,...,,,,,,,,,,week_0
5,0.09809,0.09809,0.094618,0.095486,0.059089,58435200.0,MSFT,1986,1986-03-01,3,...,,,,,,,,,,week_0
6,0.095486,0.097222,0.091146,0.092882,0.057477,59990400.0,MSFT,1986,1986-03-01,4,...,,,,,,,,,,week_0
7,0.092882,0.092882,0.08941,0.090278,0.055866,65289600.0,MSFT,1986,1986-03-01,0,...,,,,,,,,,,week_0
8,0.090278,0.092014,0.08941,0.092014,0.05694,32083200.0,MSFT,1986,1986-03-01,1,...,,,,,,,,,,week_0
9,0.092014,0.095486,0.091146,0.094618,0.058551,22752000.0,MSFT,1986,1986-03-01,2,...,,,,,,,,,,week_0


In [46]:
# create new month
df_full['month_extracted'] = df_full['Month'].dt.month

In [50]:
df_full[['Year',  'Weekday', 'month_extracted']]

Unnamed: 0,Year,Weekday,month_extracted
0,1986,3,3
1,1986,4,3
2,1986,0,3
3,1986,1,3
4,1986,2,3
...,...,...,...
5422,2024,1,4
5423,2024,3,5
5424,2024,4,5
5425,2024,0,5


In [51]:
def nth_weekday(year, month, weekday, n):
    # Find the first day of the month
    first_day = datetime(year, month, 1)
    # Find the first occurrence of the specified weekday in the month
    first_occurrence = first_day + timedelta(days=(weekday - first_day.weekday() + 7) % 7)
    # Calculate the nth occurrence
    nth_occurrence = first_occurrence + timedelta(weeks=n-1)
    return nth_occurrence

# Assuming the weekday is 0=Monday, 1=Tuesday, ..., 6=Sunday
# Add a 'Date' column to the DataFrame
df_full['Date'] = df_full.apply(lambda row: nth_weekday(row['Year'], row['month_extracted'], row['Weekday'], 1), axis=1)

# Format the date as 'yyyy-mm-dd'
df_full['Date'] = df_full['Date'].dt.strftime('%Y-%m-%d')

TypeError: 'module' object is not callable

In [49]:
df_full['wom'] = df_full['Month'].apply(week_of_month)

In [27]:
df_full[['Month','wom']]

Unnamed: 0,Month,wom
0,1986-03-01,week_0
1,1986-03-01,week_0
2,1986-03-01,week_0
3,1986-03-01,week_0
4,1986-03-01,week_0
...,...,...
5422,2024-04-01,week_1
5423,2024-05-01,week_0
5424,2024-05-01,week_0
5425,2024-05-01,week_0


In [31]:
df_full['wom'].unique()

array(['week_0', 'week_1'], dtype=object)

In [33]:
pd.get_dummies(df_full, columns=['wom'])

Unnamed: 0,Open,High,Low,Close,Adj Close_x,Volume,Ticker,Year,Month,Weekday,...,growth_brent_oil_90d,growth_brent_oil_365d,growth_btc_usd_1d,growth_btc_usd_3d,growth_btc_usd_7d,growth_btc_usd_30d,growth_btc_usd_90d,growth_btc_usd_365d,wom_week_0,wom_week_1
0,0.088542,0.101563,0.088542,0.097222,0.060163,1.031789e+09,MSFT,1986,1986-03-01,3,...,,,,,,,,,True,False
1,0.097222,0.102431,0.097222,0.100694,0.062311,3.081600e+08,MSFT,1986,1986-03-01,4,...,,,,,,,,,True,False
2,0.100694,0.103299,0.100694,0.102431,0.063386,1.331712e+08,MSFT,1986,1986-03-01,0,...,,,,,,,,,True,False
3,0.102431,0.103299,0.098958,0.099826,0.061774,6.776640e+07,MSFT,1986,1986-03-01,1,...,,,,,,,,,True,False
4,0.099826,0.100694,0.097222,0.098090,0.060700,4.789440e+07,MSFT,1986,1986-03-01,2,...,,,,,,,,,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5422,3639.000000,3648.949951,3584.050049,3594.300049,3594.300049,1.571996e+06,LT.NS,2024,2024-04-01,1,...,1.108923,0.936075,0.949809,0.956129,0.913106,0.850046,1.423982,2.158543,False,True
5423,3590.050049,3634.149902,3576.050049,3599.500000,3599.500000,3.748847e+06,LT.NS,2024,2024-05-01,3,...,1.053911,0.931945,1.014925,0.926103,0.916902,0.903379,1.369046,2.038296,True,False
5424,3610.000000,3622.000000,3488.449951,3499.800049,3499.800049,4.079696e+06,LT.NS,2024,2024-05-01,4,...,1.049197,0.946816,1.063704,1.037155,0.986425,0.953153,1.462818,2.180063,True,False
5425,3522.800049,3527.000000,3441.100098,3463.300049,3463.300049,2.614667e+06,LT.NS,2024,2024-05-01,0,...,1.027877,0.952887,0.986426,1.004327,0.989362,0.916771,1.465996,2.219715,True,False
