# **Market Prediction Using Macro Economic Data**
using LSTM and TFT

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

---
---
---

# Importing and cleaning  data

## **"niftyvix"** 

In [2]:
df0 = pd.read_csv("MacroData/fniftyvix_data.csv")
niftyvix = df0.copy()


In [3]:
# niftyvix = niftyvix.rename(columns={'Date.1': 'Date'})
# niftyvix = niftyvix.drop("Date.1", axis=1)
niftyvix = niftyvix.drop("Date.1", axis=1)
niftyvix['Date'] =  pd.to_datetime(niftyvix['Date'])
niftyvix.head(1)
niftyvix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 53 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4016 non-null   datetime64[ns]
 1   vixPrice       4016 non-null   float64       
 2   vixChange%     4016 non-null   float64       
 3   niftyPrice     4016 non-null   float64       
 4   niftyChange %  4016 non-null   float64       
 5   n5day          4016 non-null   float64       
 6   n10day         4016 non-null   float64       
 7   n20day         4016 non-null   float64       
 8   n1day          4016 non-null   float64       
 9   n60day         4016 non-null   float64       
 10  nc5day         4016 non-null   float64       
 11  nc10day        4016 non-null   float64       
 12  nc20day        4016 non-null   float64       
 13  nc1day         4016 non-null   float64       
 14  nc60day        4016 non-null   float64       
 15  v5day          4016 n

In [4]:
niftyvix = niftyvix.drop(niftyvix.columns[[5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,46,47,48,49,50]], axis=1)

In [5]:
niftyvix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 28 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4016 non-null   datetime64[ns]
 1   vixPrice       4016 non-null   float64       
 2   vixChange%     4016 non-null   float64       
 3   niftyPrice     4016 non-null   float64       
 4   niftyChange %  4016 non-null   float64       
 5   DayOfWeek      4016 non-null   object        
 6   month          4016 non-null   object        
 7   Tuesday        4016 non-null   int64         
 8   Wednesday      4016 non-null   int64         
 9   Friday         4016 non-null   int64         
 10  Monday         4016 non-null   int64         
 11  Thursday       4016 non-null   int64         
 12  Saturday       4016 non-null   int64         
 13  Sunday         4016 non-null   int64         
 14  March          4016 non-null   int64         
 15  April          4016 n

## Above dataframe will be used as a template to clean and process data

> Dates must be in ascending order, hence DFs are being reciprocaled as necessary

> All date formats are being matched with "niftyvix" DF

> Copy of each orignal df is being made so that it stays safe

> All dates are changed from object to datetime format

---

## **"Gold"** 

In [6]:
df1 = pd.read_csv("MacroData/Gold.csv")
gold = df1.copy()
gold.head()

Unnamed: 0,Date,GoldPrice,GoldOpen,GoldHigh,GoldLow,Vol.,GoldChange %
0,27-08-2024,2529.3,2530.5,2537.7,2515.4,7.27K,-0.08%
1,26-08-2024,2531.4,2522.1,2538.9,2520.7,6.08K,0.35%
2,23-08-2024,2522.6,2497.2,2530.4,2497.0,6.90K,1.17%
3,22-08-2024,2493.5,2525.3,2528.0,2483.2,7.69K,-1.21%
4,21-08-2024,2524.1,2528.9,2534.0,2505.6,5.72K,-0.13%


Dropping unecesssary columns

In [7]:
gold = gold[['Date','GoldPrice','GoldChange %']]
gold.head()

Unnamed: 0,Date,GoldPrice,GoldChange %
0,27-08-2024,2529.3,-0.08%
1,26-08-2024,2531.4,0.35%
2,23-08-2024,2522.6,1.17%
3,22-08-2024,2493.5,-1.21%
4,21-08-2024,2524.1,-0.13%


change columns to right data type

In [8]:
gold['Date']=pd.to_datetime(gold['Date'])

gold['GoldPrice']=gold['GoldPrice'].astype(str).str.replace(',','')
gold['GoldPrice']=gold['GoldPrice'].astype(float)

gold['GoldChange %']=gold['GoldChange %'].astype(str).str.replace("%","")
gold['GoldChange %']=gold['GoldChange %'].astype(float)

# gold['Date'] =  pd.to_datetime(gold['Date'])

gold.info()
gold

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4202 entries, 0 to 4201
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          4202 non-null   datetime64[ns]
 1   GoldPrice     4202 non-null   float64       
 2   GoldChange %  4202 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 98.6 KB


  gold['Date']=pd.to_datetime(gold['Date'])


Unnamed: 0,Date,GoldPrice,GoldChange %
0,2024-08-27,2529.3,-0.08
1,2024-08-26,2531.4,0.35
2,2024-08-23,2522.6,1.17
3,2024-08-22,2493.5,-1.21
4,2024-08-21,2524.1,-0.13
...,...,...,...
4197,2008-03-10,971.8,-0.25
4198,2008-03-07,974.2,-0.30
4199,2008-03-06,977.1,-1.15
4200,2008-03-05,988.5,2.30


Date i.e, index is not in accending order so we will correct it 

In [9]:
gold = gold.iloc[::-1].reset_index(drop=True)
gold.head()

Unnamed: 0,Date,GoldPrice,GoldChange %
0,2008-03-04,966.3,-1.82
1,2008-03-05,988.5,2.3
2,2008-03-06,977.1,-1.15
3,2008-03-07,974.2,-0.3
4,2008-03-10,971.8,-0.25


---

## **Crude Oil**

In [10]:
df2 = pd.read_csv("MacroData/Crudeoil.csv")
crude = df2.copy()
crude.head()

Unnamed: 0,Date,CrudePrice,CrudeOpen,CrudeHigh,CrudeLow,Vol.,CrudeChange %
0,03/04/2008,99.52,102.35,103.33,98.87,366.72K,-2.86%
1,03/05/2008,104.52,99.88,104.95,99.55,414.31K,5.02%
2,03/06/2008,105.47,104.64,105.97,102.85,337.77K,0.91%
3,03/07/2008,105.15,105.58,106.54,103.91,292.40K,-0.30%
4,03/10/2008,107.9,105.25,108.21,104.08,340.55K,2.62%


Dropping uncessecary columns

In [11]:
crude = crude[["Date","CrudePrice","CrudeChange %"]]

Changing columns to right datatype

In [12]:
crude['Date']=pd.to_datetime(crude['Date'])

crude['CrudePrice']=crude['CrudePrice'].astype(str).str.replace(',','')
crude['CrudePrice']=crude['CrudePrice'].astype(float)

crude['CrudeChange %']=crude['CrudeChange %'].astype(str).str.replace("%","")
crude['CrudeChange %']=crude['CrudeChange %'].astype(float)

crude.info()
crude

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4281 entries, 0 to 4280
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           4281 non-null   datetime64[ns]
 1   CrudePrice     4281 non-null   float64       
 2   CrudeChange %  4281 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.5 KB


Unnamed: 0,Date,CrudePrice,CrudeChange %
0,2008-03-04,99.52,-2.86
1,2008-03-05,104.52,5.02
2,2008-03-06,105.47,0.91
3,2008-03-07,105.15,-0.30
4,2008-03-10,107.90,2.62
...,...,...,...
4276,2024-08-21,71.30,-2.56
4277,2024-08-22,72.28,1.37
4278,2024-08-23,73.93,2.28
4279,2024-08-26,76.17,3.03


---

## **USDINR**

In [13]:
df3 = pd.read_csv("MacroData/usdinr.csv")
usdinr = df3.copy()
usdinr.head()

Unnamed: 0,Date,inrPrice,inrOpen,inrHigh,inrLow,Vol.,inrChange %
0,08/27/2024,83.91,83.865,83.95,83.865,,0.08%
1,08/26/2024,83.84,83.801,83.91,83.776,,0.04%
2,08/23/2024,83.809,83.913,83.933,83.778,,-0.14%
3,08/22/2024,83.93,83.895,83.965,83.876,,0.07%
4,08/21/2024,83.87,83.791,83.935,83.726,,0.14%


Dropping uncessecary columns

In [14]:
usdinr = usdinr[["Date","inrPrice","inrChange %"]]

Changing columns to right datatype

In [15]:
usdinr['Date']=pd.to_datetime(usdinr['Date'])

usdinr['inrPrice']=usdinr['inrPrice'].astype(str).str.replace(',','')
usdinr['inrPrice']=usdinr['inrPrice'].astype(float)

usdinr['inrChange %']=usdinr['inrChange %'].astype(str).str.replace("%","")
usdinr['inrChange %']=usdinr['inrChange %'].astype(float)

usdinr.info()
usdinr

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4301 entries, 0 to 4300
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         4301 non-null   datetime64[ns]
 1   inrPrice     4301 non-null   float64       
 2   inrChange %  4301 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.9 KB


Unnamed: 0,Date,inrPrice,inrChange %
0,2024-08-27,83.910,0.08
1,2024-08-26,83.840,0.04
2,2024-08-23,83.809,-0.14
3,2024-08-22,83.930,0.07
4,2024-08-21,83.870,0.14
...,...,...,...
4296,2008-03-10,40.400,-0.07
4297,2008-03-07,40.430,0.72
4298,2008-03-06,40.140,-0.15
4299,2008-03-05,40.200,-0.05


Date is not accending

In [16]:
usdinr = usdinr.iloc[::-1].reset_index(drop=True)
usdinr.head()

Unnamed: 0,Date,inrPrice,inrChange %
0,2008-03-04,40.22,-0.16
1,2008-03-05,40.2,-0.05
2,2008-03-06,40.14,-0.15
3,2008-03-07,40.43,0.72
4,2008-03-10,40.4,-0.07


---

## **"US Dollar Index"**

In [17]:
df4 = pd.read_csv("MacroData/USdollarindex.csv")
usdindex = df4.copy()
usdindex.head()

Unnamed: 0,Date,diPrice,diOpen,diHigh,diLow,Vol.,diChange %
0,27-08-2024,100.55,100.82,100.93,100.51,,-0.30%
1,26-08-2024,100.85,100.68,100.92,100.53,,0.13%
2,23-08-2024,100.72,101.46,101.55,100.6,,-0.78%
3,22-08-2024,101.51,101.13,101.63,101.09,,0.46%
4,21-08-2024,101.04,101.35,101.63,100.92,,-0.40%


Dropping uncessecary columns

In [18]:
usdindex = usdindex[["Date","diPrice","diChange %"]]

Changing columns to right datatype

In [19]:
usdindex['Date']=pd.to_datetime(usdindex['Date'])

usdindex['diPrice']=usdindex['diPrice'].astype(str).str.replace(',','')
usdindex['diPrice']=usdindex['diPrice'].astype(float)

usdindex['diChange %']=usdindex['diChange %'].astype(str).str.replace("%","")
usdindex['diChange %']=usdindex['diChange %'].astype(float)

usdindex.info()
usdindex

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4298 entries, 0 to 4297
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Date        4298 non-null   datetime64[ns]
 1   diPrice     4298 non-null   float64       
 2   diChange %  4298 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.9 KB


  usdindex['Date']=pd.to_datetime(usdindex['Date'])


Unnamed: 0,Date,diPrice,diChange %
0,2024-08-27,100.55,-0.30
1,2024-08-26,100.85,0.13
2,2024-08-23,100.72,-0.78
3,2024-08-22,101.51,0.46
4,2024-08-21,101.04,-0.40
...,...,...,...
4293,2008-03-10,72.99,-0.05
4294,2008-03-07,73.03,0.04
4295,2008-03-06,73.00,-0.65
4296,2008-03-05,73.48,-0.24


Date is not accending

In [20]:
usdindex = usdindex.iloc[::-1].reset_index(drop=True)
usdindex.head()

Unnamed: 0,Date,diPrice,diChange %
0,2008-03-04,73.66,-0.05
1,2008-03-05,73.48,-0.24
2,2008-03-06,73.0,-0.65
3,2008-03-07,73.03,0.04
4,2008-03-10,72.99,-0.05


---

## **10-2year bond yeild**  

In [21]:
df5 = pd.read_csv("MacroData/10-2year.csv")

In [22]:
df5["Date"] = pd.to_datetime(df5['Date'])
df5.info()
df5.head(15)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4301 entries, 0 to 4300
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    4301 non-null   datetime64[ns]
 1   T10Y2Y  4301 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 67.3+ KB


Unnamed: 0,Date,T10Y2Y
0,2008-03-04,1.98
1,2008-03-05,2.04
2,2008-03-06,2.09
3,2008-03-07,2.03
4,2008-03-10,1.99
5,2008-03-11,1.86
6,2008-03-12,1.86
7,2008-03-13,1.93
8,2008-03-14,1.97
9,2008-03-17,1.99


converting **T10Y2Y** column from object type to float

In [23]:
# df5["T10Y2Y"] = df5['T10Y2Y'].astype(float)

# df5.info()

> it gives an error 

ValueError: could not convert string to float: "."

There may be multiple '.' values in the column let us convert them to NaN


In [24]:
df5['T10Y2Y']=df5['T10Y2Y'].replace(".", np.nan)

"." is converted to NaN successfully. 



In [25]:
df5[12:14]

Unnamed: 0,Date,T10Y2Y
12,2008-03-20,1.75
13,2008-03-21,


Now we will fill the NaN values with preceding values and check if it's working

In [26]:
df5['T10Y2Y'] = df5['T10Y2Y'].fillna(method='ffill')    # method = "ffill" fills NaN values with previous values

  df5['T10Y2Y'] = df5['T10Y2Y'].fillna(method='ffill')    # method = "ffill" fills NaN values with previous values


Checking if any NaN values are left

In [27]:
print(df5['T10Y2Y'].isna().sum())  


0


All NaN values are filled with preceding values succcessfully

In [28]:
df5["T10Y2Y"] = df5['T10Y2Y'].astype(float)

In [29]:
df5[12:14]

Unnamed: 0,Date,T10Y2Y
12,2008-03-20,1.75
13,2008-03-21,1.75


Adding **"percent change"** as a feature

In [30]:
df5['T10Y2Y%chng'] = df5['T10Y2Y'].pct_change() * 100
T10Y2Ydf = df5.copy()
T10Y2Ydf['Date'] =  pd.to_datetime(T10Y2Ydf['Date'])
T10Y2Ydf.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4301 entries, 0 to 4300
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         4301 non-null   datetime64[ns]
 1   T10Y2Y       4301 non-null   float64       
 2   T10Y2Y%chng  4298 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.9 KB


In [31]:
T10Y2Ydf.head()

Unnamed: 0,Date,T10Y2Y,T10Y2Y%chng
0,2008-03-04,1.98,
1,2008-03-05,2.04,3.030303
2,2008-03-06,2.09,2.45098
3,2008-03-07,2.03,-2.870813
4,2008-03-10,1.99,-1.970443


There is a NaN value in first row, we will make it zero

In [32]:
T10Y2Ydf["T10Y2Y%chng"] = T10Y2Ydf['T10Y2Y%chng'].fillna('0').astype(float)
T10Y2Ydf

Unnamed: 0,Date,T10Y2Y,T10Y2Y%chng
0,2008-03-04,1.98,0.000000
1,2008-03-05,2.04,3.030303
2,2008-03-06,2.09,2.450980
3,2008-03-07,2.03,-2.870813
4,2008-03-10,1.99,-1.970443
...,...,...,...
4296,2024-08-21,-0.13,-23.529412
4297,2024-08-22,-0.13,0.000000
4298,2024-08-23,-0.09,-30.769231
4299,2024-08-26,-0.09,0.000000


In [33]:
T10Y2Ydf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4301 entries, 0 to 4300
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         4301 non-null   datetime64[ns]
 1   T10Y2Y       4301 non-null   float64       
 2   T10Y2Y%chng  4301 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 100.9 KB


---

## **SnP500**

In [34]:
dfa = pd.read_csv("MacroData/S&P500.csv")
SnP500 = pd.DataFrame()
SnP500[['Date','SnP500Price','SnP500Change %']] = dfa[['Date','Price','Change %']]

SnP500['Date']=pd.to_datetime(SnP500['Date'])

SnP500['SnP500Price']=SnP500['SnP500Price'].astype(str).str.replace(',','')
SnP500['SnP500Price']=SnP500['SnP500Price'].astype(float)

SnP500['SnP500Change %']=SnP500['SnP500Change %'].astype(str).str.replace("%","")
SnP500['SnP500Change %']=SnP500['SnP500Change %'].astype(float)

SnP500.info()
SnP500.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4107 entries, 0 to 4106
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            4107 non-null   datetime64[ns]
 1   SnP500Price     4107 non-null   float64       
 2   SnP500Change %  4107 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 96.4 KB


Unnamed: 0,Date,SnP500Price,SnP500Change %
0,2024-09-27,5738.17,-0.13
1,2024-09-26,5745.37,0.4
2,2024-09-25,5722.26,-0.19
3,2024-09-24,5732.93,0.25
4,2024-09-23,5718.57,0.28


---
---

# **Joining all DFs** 
> Now we will join all above DFs togather priortising the Dates of "niftyvix" DataFrame

## Working DFs with **Non-Daily** frequency

Merge each DataFrame one by one, ensuring 'Date' index is maintained

In [35]:
daily_df = niftyvix
daily_df.head()

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,July,August,September,October,November,December,January,February,cluster,Month
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0,0,0,0,0,0,0,0,2,6
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,0,0,0,0,0,0,0,0,2,6
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0,0,0,0,0,0,0,0,2,6
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,0,0,0,0,0,0,0,0,2,6
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,0,0,0,0,0,0,0,0,2,6


In [36]:
daily_df = pd.merge(daily_df, gold, how='left', on='Date')
daily_df.head(1)

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,September,October,November,December,January,February,cluster,Month,GoldPrice,GoldChange %
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0,0,0,0,0,0,2,6,875.5,-0.94


In [37]:
daily_df = pd.merge(daily_df, crude, how='left', on='Date')

In [38]:
daily_df = pd.merge(daily_df, usdinr, how='left', on='Date')

In [39]:
daily_df = pd.merge(daily_df, usdindex, how='left', on='Date')

In [40]:
daily_df = pd.merge(daily_df, T10Y2Ydf, how='left', on='Date')

In [41]:
daily_df = pd.merge(daily_df, SnP500, how='left', on='Date')

In [42]:
daily_df.head(2)

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,CrudePrice,CrudeChange %,inrPrice,inrChange %,diPrice,diChange %,T10Y2Y,T10Y2Y%chng,SnP500Price,SnP500Change %
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,127.79,4.49,42.835,0.53,73.04,-0.52,1.54,1.986755,1404.0,1.95
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,138.54,8.41,42.665,-0.4,72.39,-0.89,1.54,0.0,1360.7,-3.08


There are null values in GOLD data so we will fix it 

In [43]:
daily_df.ffill(inplace=True)


In [44]:
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 40 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            4016 non-null   datetime64[ns]
 1   vixPrice        4016 non-null   float64       
 2   vixChange%      4016 non-null   float64       
 3   niftyPrice      4016 non-null   float64       
 4   niftyChange %   4016 non-null   float64       
 5   DayOfWeek       4016 non-null   object        
 6   month           4016 non-null   object        
 7   Tuesday         4016 non-null   int64         
 8   Wednesday       4016 non-null   int64         
 9   Friday          4016 non-null   int64         
 10  Monday          4016 non-null   int64         
 11  Thursday        4016 non-null   int64         
 12  Saturday        4016 non-null   int64         
 13  Sunday          4016 non-null   int64         
 14  March           4016 non-null   int64         
 15  Apri

---

all the DataFrames above were with daily frequency, Now below we will deal with the DataFrames that were not in daily frequency 

### **Fed Interest Rate**

In [45]:
df6 = pd.read_csv("MacroData/Fedinterest.csv")
fedinterest = df6.copy()
fedinterest["Date"] = pd.to_datetime(fedinterest['Date'])
fedinterest.info()
fedinterest.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69 entries, 0 to 68
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         69 non-null     datetime64[ns]
 1   Fedinterest  69 non-null     float64       
 2   Fed%change   69 non-null     float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 1.7 KB


Unnamed: 0,Date,Fedinterest,Fed%change
0,2008-06-12,8.0,0.0
1,2008-06-25,8.5,6.25


In [46]:
daily_df = pd.merge(daily_df, fedinterest, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 42 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            4016 non-null   datetime64[ns]
 1   vixPrice        4016 non-null   float64       
 2   vixChange%      4016 non-null   float64       
 3   niftyPrice      4016 non-null   float64       
 4   niftyChange %   4016 non-null   float64       
 5   DayOfWeek       4016 non-null   object        
 6   month           4016 non-null   object        
 7   Tuesday         4016 non-null   int64         
 8   Wednesday       4016 non-null   int64         
 9   Friday          4016 non-null   int64         
 10  Monday          4016 non-null   int64         
 11  Thursday        4016 non-null   int64         
 12  Saturday        4016 non-null   int64         
 13  Sunday          4016 non-null   int64         
 14  March           4016 non-null   int64         
 15  Apri

#### Function for feature generation
* Takes a DataFrame column as input and returns a new column that resets its counter (0, 1, 2, ...) whenever the value in the input column changes.

In [47]:
def create_reset_counter(input_column):
    group = (input_column != input_column.shift()).cumsum()
    return input_column.groupby(group).cumcount()


Adding Anticepation Feature

In [48]:

daily_df["FedinterestAnticepation"] = create_reset_counter(daily_df["Fedinterest"])
daily_df.head()


Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,inrChange %,diPrice,diChange %,T10Y2Y,T10Y2Y%chng,SnP500Price,SnP500Change %,Fedinterest,Fed%change,FedinterestAnticepation
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0.53,73.04,-0.52,1.54,1.986755,1404.0,1.95,,,0
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,-0.4,72.39,-0.89,1.54,0.0,1360.7,-3.08,,,0
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0.36,72.85,0.64,1.29,-16.233766,1361.8,0.08,,,0
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,0.16,73.7,1.17,1.2,-6.976744,1358.4,-0.25,,,0
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,-0.38,73.21,-0.66,1.27,5.833333,1335.5,-1.69,,,0


---

### **RBI Interest Rate**

In [49]:
df7 = pd.read_csv("MacroData/Rbiinterest.csv")
rbiinterest = df7.copy()
rbiinterest["Date"] = pd.to_datetime(rbiinterest['Date'])
rbiinterest.info()
rbiinterest.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69 entries, 0 to 68
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         69 non-null     datetime64[ns]
 1   Rbiinterest  69 non-null     float64       
 2   Rbi%change   69 non-null     float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 1.7 KB


Unnamed: 0,Date,Rbiinterest,Rbi%change
0,2008-06-12,8.0,0.0
1,2008-06-25,8.5,6.25


In [50]:
daily_df = pd.merge(daily_df, rbiinterest, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 45 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Date                     4016 non-null   datetime64[ns]
 1   vixPrice                 4016 non-null   float64       
 2   vixChange%               4016 non-null   float64       
 3   niftyPrice               4016 non-null   float64       
 4   niftyChange %            4016 non-null   float64       
 5   DayOfWeek                4016 non-null   object        
 6   month                    4016 non-null   object        
 7   Tuesday                  4016 non-null   int64         
 8   Wednesday                4016 non-null   int64         
 9   Friday                   4016 non-null   int64         
 10  Monday                   4016 non-null   int64         
 11  Thursday                 4016 non-null   int64         
 12  Saturday                 4016 non-

Adding Anticepation Feature

In [51]:
daily_df["RbiinterestAnticepation"] = create_reset_counter(daily_df["Rbiinterest"])
daily_df.head()

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,T10Y2Y,T10Y2Y%chng,SnP500Price,SnP500Change %,Fedinterest,Fed%change,FedinterestAnticepation,Rbiinterest,Rbi%change,RbiinterestAnticepation
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,1.54,1.986755,1404.0,1.95,,,0,,,0
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,1.54,0.0,1360.7,-3.08,,,0,,,0
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,1.29,-16.233766,1361.8,0.08,,,0,,,0
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,1.2,-6.976744,1358.4,-0.25,,,0,,,0
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,1.27,5.833333,1335.5,-1.69,,,0,,,0


---

### **US Inflation**

In [52]:
df8 = pd.read_csv("MacroData/USinflationdata.csv")
USinflation = df8.copy()
USinflation["Date"] = pd.to_datetime(USinflation['Date'])
USinflation.head()

Unnamed: 0,Date,USInflation Rate (%)
0,2008-03-01,7.7
1,2008-04-01,8.0
2,2008-05-01,8.1
3,2008-06-01,8.6
4,2008-07-01,9.0


adding a column that shows percentage change

In [53]:
USinflation['USInflationRate%chng'] = USinflation['USInflation Rate (%)'].pct_change() * 100 

In [54]:
USinflation['USInflationRate%chng']=USinflation['USInflationRate%chng'].fillna('0').astype(float)

In [55]:
daily_df = pd.merge(daily_df, USinflation, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 48 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Date                     4016 non-null   datetime64[ns]
 1   vixPrice                 4016 non-null   float64       
 2   vixChange%               4016 non-null   float64       
 3   niftyPrice               4016 non-null   float64       
 4   niftyChange %            4016 non-null   float64       
 5   DayOfWeek                4016 non-null   object        
 6   month                    4016 non-null   object        
 7   Tuesday                  4016 non-null   int64         
 8   Wednesday                4016 non-null   int64         
 9   Friday                   4016 non-null   int64         
 10  Monday                   4016 non-null   int64         
 11  Thursday                 4016 non-null   int64         
 12  Saturday                 4016 non-

In [56]:
USinflation.info()
USinflation.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date                  198 non-null    datetime64[ns]
 1   USInflation Rate (%)  198 non-null    float64       
 2   USInflationRate%chng  198 non-null    float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 4.8 KB


Unnamed: 0,Date,USInflation Rate (%),USInflationRate%chng
0,2008-03-01,7.7,0.0
1,2008-04-01,8.0,3.896104


Adding Anticepation Feature

In [57]:
daily_df["USInflation Rate (%)Anticepation"] = create_reset_counter(daily_df["USInflation Rate (%)"])
daily_df.head()

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,SnP500Change %,Fedinterest,Fed%change,FedinterestAnticepation,Rbiinterest,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,1.95,,,0,,,0,,,0
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,-3.08,,,0,,,0,,,0
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0.08,,,0,,,0,,,0
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,-0.25,,,0,,,0,,,0
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,-1.69,,,0,,,0,,,0


---

### **India Inflation Rate**

In [58]:
df9 = pd.read_csv("MacroData/IndiaInflation.csv")
IndiaInflation = df9.copy()
IndiaInflation["Date"] = pd.to_datetime(IndiaInflation['Date'])
IndiaInflation.head()

Unnamed: 0,Date,IndiaInflationRate(%)
0,2008-03-01,7.9
1,2008-04-01,8.0
2,2008-05-01,9.1
3,2008-06-01,9.3
4,2008-07-01,11.8


In [59]:
IndiaInflation['IndiaInflationRate(%)chng'] = IndiaInflation['IndiaInflationRate(%)'].pct_change() * 100 

In [60]:
IndiaInflation['IndiaInflationRate(%)chng']=IndiaInflation['IndiaInflationRate(%)chng'].fillna('0').astype(float)

In [61]:
IndiaInflation.info()
IndiaInflation.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 3 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Date                       198 non-null    datetime64[ns]
 1   IndiaInflationRate(%)      198 non-null    float64       
 2   IndiaInflationRate(%)chng  198 non-null    float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 4.8 KB


Unnamed: 0,Date,IndiaInflationRate(%),IndiaInflationRate(%)chng
0,2008-03-01,7.9,0.0
1,2008-04-01,8.0,1.265823


In [62]:
daily_df = pd.merge(daily_df, IndiaInflation, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 51 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Date                              4016 non-null   datetime64[ns]
 1   vixPrice                          4016 non-null   float64       
 2   vixChange%                        4016 non-null   float64       
 3   niftyPrice                        4016 non-null   float64       
 4   niftyChange %                     4016 non-null   float64       
 5   DayOfWeek                         4016 non-null   object        
 6   month                             4016 non-null   object        
 7   Tuesday                           4016 non-null   int64         
 8   Wednesday                         4016 non-null   int64         
 9   Friday                            4016 non-null   int64         
 10  Monday                            4016 non-null 

Adding Anticepation Feature

In [63]:
daily_df["IndiaInflationRate(%)Anticepation"] = create_reset_counter(daily_df["IndiaInflationRate(%)"])
daily_df.head()

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,FedinterestAnticepation,Rbiinterest,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0,,,0,,,0,,,0
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,0,,,0,,,0,,,0
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0,,,0,,,0,,,0
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,0,,,0,,,0,,,0
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,0,,,0,,,0,,,0


### Dealing with NaN values of columns with frequencies other than "Daily"
We can observe that there are about **17** rows that are NaN in the begning of data out of **4015** entries so we can replace them with most recent available entry

>Columns with NaN values are as below

**Fedinterest,	Fed%change,	Rbiinterest,	Rbi%change,	USInflation Rate (%),	USInflationRate%chng,	IndiaInflationRate(%),	IndiaInflationRate(%)chng**

In [64]:
daily_df['Fedinterest'] = daily_df['Fedinterest'].bfill()

In [65]:
daily_df['Fed%change'] = daily_df['Fed%change'].bfill()

In [66]:
daily_df['Rbiinterest'] = daily_df['Rbiinterest'].bfill()

In [67]:
daily_df['Rbi%change'] = daily_df['Rbi%change'].bfill()

In [68]:
daily_df['USInflation Rate (%)'] = daily_df['USInflation Rate (%)'].bfill()

In [69]:
daily_df['USInflationRate%chng'] = daily_df['USInflationRate%chng'].bfill()

In [70]:
daily_df['IndiaInflationRate(%)'] = daily_df['IndiaInflationRate(%)'].bfill()

In [71]:
daily_df['IndiaInflationRate(%)chng'] = daily_df['IndiaInflationRate(%)chng'].bfill()

In [72]:
daily_df.ffill(inplace=True)
daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 52 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Date                               4016 non-null   datetime64[ns]
 1   vixPrice                           4016 non-null   float64       
 2   vixChange%                         4016 non-null   float64       
 3   niftyPrice                         4016 non-null   float64       
 4   niftyChange %                      4016 non-null   float64       
 5   DayOfWeek                          4016 non-null   object        
 6   month                              4016 non-null   object        
 7   Tuesday                            4016 non-null   int64         
 8   Wednesday                          4016 non-null   int64         
 9   Friday                             4016 non-null   int64         
 10  Monday                             4

In [None]:
daily_df.head()

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,FedinterestAnticepation,Rbiinterest,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation
0,2008-06-05,30.32,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0
1,2008-06-06,30.32,0.0,4627.8,-1.05,Friday,June,0,0,1,...,0,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0
2,2008-06-09,32.43,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0
3,2008-06-10,30.23,-6.78,4449.8,-1.14,Tuesday,June,1,0,0,...,0,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0
4,2008-06-11,29.64,-1.95,4523.6,1.66,Wednesday,June,0,1,0,...,0,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0


## Working on DFs which have less than 20 entries 
These entries are from a duration of more than 16 years

---

### **IndianBudgetDates**

In [74]:
df10 = pd.read_csv("MacroData/IndianBudgetDates.csv")
IndianBudgetDates = df10.copy()
IndianBudgetDates["Date"] = pd.to_datetime(IndianBudgetDates['Date'])
IndianBudgetDates.head()

Unnamed: 0,Date,IndiaBudgetDatesMarker
0,2008-02-01,1
1,2009-02-01,1
2,2010-02-01,1
3,2011-02-01,1
4,2012-02-01,1


Adding above df in dialy_df and filling NaN values with 0

In [75]:
daily_df = pd.merge(daily_df, IndianBudgetDates, how='left', on='Date')
daily_df['IndiaBudgetDatesMarker'].fillna(0)
daily_df['IndiaBudgetDatesMarker'].head(4)

0   NaN
1   NaN
2   NaN
3   NaN
Name: IndiaBudgetDatesMarker, dtype: float64

Function to create **Feature** which will help machine anticepate the Indian Budget

In [76]:

def calculate_anticipation(df, marker_column, output_column):
    counter = 0
    anticipation = []

    for value in df[marker_column]:
        if value == 1:
            counter = 0  # Reset counter when marker is 1
        anticipation.append(counter)
        counter += 1

    df[output_column] = anticipation
    return df

In [77]:
calculate_anticipation(daily_df, "IndiaBudgetDatesMarker", "IndiaBudgetDatesAnticipation")

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesMarker,IndiaBudgetDatesAnticipation
0,2008-06-05,30.3200,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.881720,0,,0
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,0.0,0,9.0,4.651163,0,11.8,26.881720,0,,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.881720,0,,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.881720,0,,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,0.0,0,9.0,4.651163,0,11.8,26.881720,0,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,2024-08-21,13.3325,-3.53,24770.20,0.29,Wednesday,August,0,1,0,...,0.0,378,3.7,5.714286,13,3.7,5.714286,13,,136
4012,2024-08-22,13.0000,-2.49,24811.50,0.17,Thursday,August,0,0,0,...,0.0,379,3.7,5.714286,14,3.7,5.714286,14,,137
4013,2024-08-23,13.5525,4.25,24823.15,0.05,Friday,August,0,0,1,...,0.0,380,3.7,5.714286,15,3.7,5.714286,15,,138
4014,2024-08-26,13.7950,1.79,25010.60,0.76,Monday,August,0,0,0,...,0.0,381,3.7,5.714286,16,3.7,5.714286,16,,139


Values of **"IndiaBudgetDatesAnticipation" is reseting** at new budget event so we can say it's working, **now we can drop "IndiaBudgetDatesMarker"**

In [None]:
daily_df = daily_df.drop("IndiaBudgetDatesMarker", axis=1)
daily_df.info()
daily_df[1:-136]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 53 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Date                               4016 non-null   datetime64[ns]
 1   vixPrice                           4016 non-null   float64       
 2   vixChange%                         4016 non-null   float64       
 3   niftyPrice                         4016 non-null   float64       
 4   niftyChange %                      4016 non-null   float64       
 5   DayOfWeek                          4016 non-null   object        
 6   month                              4016 non-null   object        
 7   Tuesday                            4016 non-null   int64         
 8   Wednesday                          4016 non-null   int64         
 9   Friday                             4016 non-null   int64         
 10  Monday                             4

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,Rbiinterest,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0,4
5,2008-06-12,29.2300,-1.38,4539.35,0.35,Thursday,June,0,0,0,...,8.0,0.0,0,9.0,4.651163,0,11.8,26.88172,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3875,2024-02-01,14.4550,-9.92,21697.45,-0.13,Thursday,February,0,0,0,...,6.5,0.0,242,5.1,0.000000,22,5.1,0.00000,22,0
3876,2024-02-02,14.6975,1.68,21853.80,0.72,Friday,February,0,0,1,...,6.5,0.0,243,5.1,0.000000,23,5.1,0.00000,23,1
3877,2024-02-05,15.6175,6.26,21771.70,-0.38,Monday,February,0,0,0,...,6.5,0.0,244,5.1,0.000000,24,5.1,0.00000,24,2
3878,2024-02-06,15.7850,1.07,21929.40,0.72,Tuesday,February,1,0,0,...,6.5,0.0,245,5.1,0.000000,25,5.1,0.00000,25,3


---

### **IndiaElectionDates**

In [79]:
df11 = pd.read_csv("MacroData/IndiaElectionDates.csv")
IndiaElectionDates = df11.copy()
IndiaElectionDates["Date"] = pd.to_datetime(IndiaElectionDates['Date'])
IndiaElectionDates.head()

Unnamed: 0,Date,IndiaElectionDatesMarker
0,2009-04-16,1
1,2014-04-07,1
2,2019-04-11,1


Adding above df in dialy_df and filling NaN values with 0

In [80]:
daily_df = pd.merge(daily_df, IndiaElectionDates, how='left', on='Date')
daily_df['IndiaElectionDatesMarker'].fillna(0)
daily_df['IndiaElectionDatesMarker'].head(4)

0   NaN
1   NaN
2   NaN
3   NaN
Name: IndiaElectionDatesMarker, dtype: float64

Creating and adding **"anticepation"** feature

In [81]:
calculate_anticipation(daily_df, "IndiaElectionDatesMarker", "IndiaElectionDatesAnticipation")

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation,IndiaElectionDatesMarker,IndiaElectionDatesAnticipation
0,2008-06-05,30.3200,1.68,4676.95,1.99,Thursday,June,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,0,,0
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,0,9.0,4.651163,0,11.8,26.881720,0,1,,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,2,,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,3,,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,0,9.0,4.651163,0,11.8,26.881720,0,4,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,2024-08-21,13.3325,-3.53,24770.20,0.29,Wednesday,August,0,1,0,...,378,3.7,5.714286,13,3.7,5.714286,13,136,,1325
4012,2024-08-22,13.0000,-2.49,24811.50,0.17,Thursday,August,0,0,0,...,379,3.7,5.714286,14,3.7,5.714286,14,137,,1326
4013,2024-08-23,13.5525,4.25,24823.15,0.05,Friday,August,0,0,1,...,380,3.7,5.714286,15,3.7,5.714286,15,138,,1327
4014,2024-08-26,13.7950,1.79,25010.60,0.76,Monday,August,0,0,0,...,381,3.7,5.714286,16,3.7,5.714286,16,139,,1328


Checking if new feature is added correctly and droping the marker column

In [82]:
daily_df = daily_df.drop("IndiaElectionDatesMarker", axis=1)
daily_df.info()
daily_df[1:-136]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 54 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Date                               4016 non-null   datetime64[ns]
 1   vixPrice                           4016 non-null   float64       
 2   vixChange%                         4016 non-null   float64       
 3   niftyPrice                         4016 non-null   float64       
 4   niftyChange %                      4016 non-null   float64       
 5   DayOfWeek                          4016 non-null   object        
 6   month                              4016 non-null   object        
 7   Tuesday                            4016 non-null   int64         
 8   Wednesday                          4016 non-null   int64         
 9   Friday                             4016 non-null   int64         
 10  Monday                             4

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,Rbi%change,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation,IndiaElectionDatesAnticipation
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,0.0,0,9.0,4.651163,0,11.8,26.88172,0,1,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.88172,0,2,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.88172,0,3,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,0.0,0,9.0,4.651163,0,11.8,26.88172,0,4,4
5,2008-06-12,29.2300,-1.38,4539.35,0.35,Thursday,June,0,0,0,...,0.0,0,9.0,4.651163,0,11.8,26.88172,0,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3875,2024-02-01,14.4550,-9.92,21697.45,-0.13,Thursday,February,0,0,0,...,0.0,242,5.1,0.000000,22,5.1,0.00000,22,0,1189
3876,2024-02-02,14.6975,1.68,21853.80,0.72,Friday,February,0,0,1,...,0.0,243,5.1,0.000000,23,5.1,0.00000,23,1,1190
3877,2024-02-05,15.6175,6.26,21771.70,-0.38,Monday,February,0,0,0,...,0.0,244,5.1,0.000000,24,5.1,0.00000,24,2,1191
3878,2024-02-06,15.7850,1.07,21929.40,0.72,Tuesday,February,1,0,0,...,0.0,245,5.1,0.000000,25,5.1,0.00000,25,3,1192


---

### **UsElectionDates**

In [83]:
df12 = pd.read_csv("MacroData/UsElectionDates.csv")
UsElectionDates = df12.copy()
UsElectionDates["Date"] = pd.to_datetime(UsElectionDates['Date'])
UsElectionDates.head()

Unnamed: 0,Date,UsElectionDatesMarker
0,2008-11-04,1
1,2012-11-06,1
2,2016-11-08,1
3,2020-11-03,1


Adding above df in dialy_df and filling NaN values with 0

In [84]:
daily_df = pd.merge(daily_df, UsElectionDates, how='left', on='Date')
daily_df['UsElectionDatesMarker'].fillna(0)
daily_df['UsElectionDatesMarker'].head(4)

0   NaN
1   NaN
2   NaN
3   NaN
Name: UsElectionDatesMarker, dtype: float64

Creating and adding **"anticepation"** feature

In [85]:
calculate_anticipation(daily_df, "UsElectionDatesMarker", "UsElectionDatesAnticipation")

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation,IndiaElectionDatesAnticipation,UsElectionDatesMarker,UsElectionDatesAnticipation
0,2008-06-05,30.3200,1.68,4676.95,1.99,Thursday,June,0,0,0,...,9.0,4.651163,0,11.8,26.881720,0,0,0,,0
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,9.0,4.651163,0,11.8,26.881720,0,1,1,,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,9.0,4.651163,0,11.8,26.881720,0,2,2,,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,9.0,4.651163,0,11.8,26.881720,0,3,3,,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,9.0,4.651163,0,11.8,26.881720,0,4,4,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,2024-08-21,13.3325,-3.53,24770.20,0.29,Wednesday,August,0,1,0,...,3.7,5.714286,13,3.7,5.714286,13,136,1325,,940
4012,2024-08-22,13.0000,-2.49,24811.50,0.17,Thursday,August,0,0,0,...,3.7,5.714286,14,3.7,5.714286,14,137,1326,,941
4013,2024-08-23,13.5525,4.25,24823.15,0.05,Friday,August,0,0,1,...,3.7,5.714286,15,3.7,5.714286,15,138,1327,,942
4014,2024-08-26,13.7950,1.79,25010.60,0.76,Monday,August,0,0,0,...,3.7,5.714286,16,3.7,5.714286,16,139,1328,,943


Checking if new feature is added correctly and droping the marker column

In [86]:
daily_df = daily_df.drop("UsElectionDatesMarker", axis=1)
daily_df.info()
daily_df[1:-941]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4016 entries, 0 to 4015
Data columns (total 55 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   Date                               4016 non-null   datetime64[ns]
 1   vixPrice                           4016 non-null   float64       
 2   vixChange%                         4016 non-null   float64       
 3   niftyPrice                         4016 non-null   float64       
 4   niftyChange %                      4016 non-null   float64       
 5   DayOfWeek                          4016 non-null   object        
 6   month                              4016 non-null   object        
 7   Tuesday                            4016 non-null   int64         
 8   Wednesday                          4016 non-null   int64         
 9   Friday                             4016 non-null   int64         
 10  Monday                             4

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,DayOfWeek,month,Tuesday,Wednesday,Friday,...,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation,IndiaElectionDatesAnticipation,UsElectionDatesAnticipation
1,2008-06-06,30.3200,0.00,4627.80,-1.05,Friday,June,0,0,1,...,0,9.0,4.651163,0,11.8,26.881720,0,1,1,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,Monday,June,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,2,2,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,Tuesday,June,1,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,3,3,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,Wednesday,June,0,1,0,...,0,9.0,4.651163,0,11.8,26.881720,0,4,4,4
5,2008-06-12,29.2300,-1.38,4539.35,0.35,Thursday,June,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3070,2020-11-02,25.2100,1.85,11669.15,0.23,Monday,November,0,0,0,...,114,7.6,4.109589,21,7.6,4.109589,21,187,384,986
3071,2020-11-03,24.2400,-3.85,11813.50,1.24,Tuesday,November,1,0,0,...,115,7.6,4.109589,22,7.6,4.109589,22,188,385,0
3072,2020-11-04,23.2000,-4.29,11908.50,0.80,Wednesday,November,0,1,0,...,116,7.6,4.109589,23,7.6,4.109589,23,189,386,1
3073,2020-11-05,20.9650,-9.63,12120.30,1.78,Thursday,November,0,0,0,...,117,7.6,4.109589,24,7.6,4.109589,24,190,387,2


In [87]:
daily_df.drop(['DayOfWeek','month'], axis= 1)

Unnamed: 0,Date,vixPrice,vixChange%,niftyPrice,niftyChange %,Tuesday,Wednesday,Friday,Monday,Thursday,...,RbiinterestAnticepation,USInflation Rate (%),USInflationRate%chng,USInflation Rate (%)Anticepation,IndiaInflationRate(%),IndiaInflationRate(%)chng,IndiaInflationRate(%)Anticepation,IndiaBudgetDatesAnticipation,IndiaElectionDatesAnticipation,UsElectionDatesAnticipation
0,2008-06-05,30.3200,1.68,4676.95,1.99,0,0,0,0,1,...,0,9.0,4.651163,0,11.8,26.881720,0,0,0,0
1,2008-06-06,30.3200,0.00,4627.80,-1.05,0,0,1,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,1,1,1
2,2008-06-09,32.4300,6.96,4500.95,-2.74,0,0,0,1,0,...,0,9.0,4.651163,0,11.8,26.881720,0,2,2,2
3,2008-06-10,30.2300,-6.78,4449.80,-1.14,1,0,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,3,3,3
4,2008-06-11,29.6400,-1.95,4523.60,1.66,0,1,0,0,0,...,0,9.0,4.651163,0,11.8,26.881720,0,4,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,2024-08-21,13.3325,-3.53,24770.20,0.29,0,1,0,0,0,...,378,3.7,5.714286,13,3.7,5.714286,13,136,1325,940
4012,2024-08-22,13.0000,-2.49,24811.50,0.17,0,0,0,0,1,...,379,3.7,5.714286,14,3.7,5.714286,14,137,1326,941
4013,2024-08-23,13.5525,4.25,24823.15,0.05,0,0,1,0,0,...,380,3.7,5.714286,15,3.7,5.714286,15,138,1327,942
4014,2024-08-26,13.7950,1.79,25010.60,0.76,0,0,0,1,0,...,381,3.7,5.714286,16,3.7,5.714286,16,139,1328,943


In [88]:
daily_df.to_csv('finaldata.csv', index=False)