# **Market Prediction Using Macro Economic Data**
using LSTM and TFT

In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

---
---
---

# Importing and cleaning  data

## **"niftyvix"** 

In [None]:
df0 = pd.read_csv("MacroData/fniftyvix_data.csv")
niftyvix = df0.copy()


In [None]:
# niftyvix = niftyvix.rename(columns={'Date.1': 'Date'})
# niftyvix = niftyvix.drop("Date.1", axis=1)
niftyvix = niftyvix.drop("Date.1", axis=1)
niftyvix['Date'] =  pd.to_datetime(niftyvix['Date'])
niftyvix.head(1)
niftyvix.info()

In [None]:
niftyvix = niftyvix.drop(niftyvix.columns[[5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,46,47,48,49,50]], axis=1)

In [None]:
niftyvix.info()

## Above dataframe will be used as a template to clean and process data

> Dates must be in ascending order, hence DFs are being reciprocaled as necessary

> All date formats are being matched with "niftyvix" DF

> Copy of each orignal df is being made so that it stays safe

> All dates are changed from object to datetime format

---

## **"Gold"** 

In [None]:
df1 = pd.read_csv("MacroData/Gold.csv")
gold = df1.copy()
gold.head()

Dropping unecesssary columns

In [None]:
gold = gold[['Date','GoldPrice','GoldChange %']]
gold.head()

change columns to right data type

In [None]:
gold['Date']=pd.to_datetime(gold['Date'])

gold['GoldPrice']=gold['GoldPrice'].astype(str).str.replace(',','')
gold['GoldPrice']=gold['GoldPrice'].astype(float)

gold['GoldChange %']=gold['GoldChange %'].astype(str).str.replace("%","")
gold['GoldChange %']=gold['GoldChange %'].astype(float)

gold['Date'] =  pd.to_datetime(gold['Date'])

gold.info()
gold

Date i.e, index is not in accending order so we will correct it 

In [None]:
gold = gold.iloc[::-1].reset_index(drop=True)
gold.head()

---

## **Crude Oil**

In [None]:
df2 = pd.read_csv("MacroData/Crudeoil.csv")
crude = df2.copy()
crude.head()

Dropping uncessecary columns

In [None]:
crude = crude[["Date","CrudePrice","CrudeChange %"]]

Changing columns to right datatype

In [None]:
crude['Date']=pd.to_datetime(crude['Date'])

crude['CrudePrice']=crude['CrudePrice'].astype(str).str.replace(',','')
crude['CrudePrice']=crude['CrudePrice'].astype(float)

crude['CrudeChange %']=crude['CrudeChange %'].astype(str).str.replace("%","")
crude['CrudeChange %']=crude['CrudeChange %'].astype(float)

crude.info()
crude

---

## **USDINR**

In [None]:
df3 = pd.read_csv("MacroData/usdinr.csv")
usdinr = df3.copy()
usdinr.head()

Dropping uncessecary columns

In [None]:
usdinr = usdinr[["Date","inrPrice","inrChange %"]]

Changing columns to right datatype

In [None]:
usdinr['Date']=pd.to_datetime(usdinr['Date'])

usdinr['inrPrice']=usdinr['inrPrice'].astype(str).str.replace(',','')
usdinr['inrPrice']=usdinr['inrPrice'].astype(float)

usdinr['inrChange %']=usdinr['inrChange %'].astype(str).str.replace("%","")
usdinr['inrChange %']=usdinr['inrChange %'].astype(float)

usdinr.info()
usdinr

Date is not accending

In [None]:
usdinr = usdinr.iloc[::-1].reset_index(drop=True)
usdinr.head()

---

## **"US Dollar Index"**

In [None]:
df4 = pd.read_csv("MacroData/USdollarindex.csv")
usdindex = df4.copy()
usdindex.head()

Dropping uncessecary columns

In [None]:
usdindex = usdindex[["Date","diPrice","diChange %"]]

Changing columns to right datatype

In [None]:
usdindex['Date']=pd.to_datetime(usdindex['Date'])

usdindex['diPrice']=usdindex['diPrice'].astype(str).str.replace(',','')
usdindex['diPrice']=usdindex['diPrice'].astype(float)

usdindex['diChange %']=usdindex['diChange %'].astype(str).str.replace("%","")
usdindex['diChange %']=usdindex['diChange %'].astype(float)

usdindex.info()
usdindex

Date is not accending

In [None]:
usdindex = usdindex.iloc[::-1].reset_index(drop=True)
usdindex.head()

---

## **10-2year bond yeild**  

In [None]:
df5 = pd.read_csv("MacroData/10-2year.csv")

In [None]:
df5["Date"] = pd.to_datetime(df5['Date'])
df5.info()
df5.head(15)

converting **T10Y2Y** column from object type to float

In [None]:
# df5["T10Y2Y"] = df5['T10Y2Y'].astype(float)

# df5.info()

> it gives an error 

ValueError: could not convert string to float: "."

There may be multiple '.' values in the column let us convert them to NaN


In [None]:
df5['T10Y2Y']=df5['T10Y2Y'].replace(".", np.nan)

"." is converted to NaN successfully. 



In [None]:
df5[12:14]

Now we will fill the NaN values with preceding values and check if it's working

In [None]:
df5['T10Y2Y'] = df5['T10Y2Y'].fillna(method='ffill')    # method = "ffill" fills NaN values with previous values

Checking if any NaN values are left

In [None]:
print(df5['T10Y2Y'].isna().sum())  


All NaN values are filled with preceding values succcessfully

In [None]:
df5["T10Y2Y"] = df5['T10Y2Y'].astype(float)

In [None]:
df5[12:14]

Adding **"percent change"** as a feature

In [None]:
df5['T10Y2Y%chng'] = df5['T10Y2Y'].pct_change() * 100
T10Y2Ydf = df5.copy()
T10Y2Ydf['Date'] =  pd.to_datetime(T10Y2Ydf['Date'])
T10Y2Ydf.info()


In [None]:
T10Y2Ydf.head()

There is a NaN value in first row, we will make it zero

In [None]:
T10Y2Ydf["T10Y2Y%chng"] = T10Y2Ydf['T10Y2Y%chng'].fillna('0').astype(float)
T10Y2Ydf

In [None]:
T10Y2Ydf.info()

---
---

# Now we will join all above DFs togather priortising the Dates of "niftyvix" DataFrame

Merge each DataFrame one by one, ensuring 'Date' index is maintained

In [None]:
daily_df = niftyvix
daily_df.head()

In [None]:
daily_df = pd.merge(daily_df, gold, how='left', on='Date')
daily_df.head(1)

In [None]:
daily_df = pd.merge(daily_df, crude, how='left', on='Date')

In [None]:
daily_df = pd.merge(daily_df, usdinr, how='left', on='Date')

In [None]:
daily_df = pd.merge(daily_df, usdindex, how='left', on='Date')

In [None]:
daily_df = pd.merge(daily_df, T10Y2Ydf, how='left', on='Date')

In [None]:
daily_df.head(2)

There are null values in GOLD data so we will fix it 

In [None]:
daily_df.ffill(inplace=True)


In [None]:
daily_df.info()

---

all the DataFrames above were with daily frequency, Now below we will deal with the DataFrames that were not in daily frequency 

## **Fed Interest Rate**

In [None]:
df6 = pd.read_csv("MacroData/Fedinterest.csv")
fedinterest = df6.copy()
fedinterest["Date"] = pd.to_datetime(fedinterest['Date'])
fedinterest.info()
fedinterest.head(2)

In [None]:
daily_df = pd.merge(daily_df, fedinterest, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

---

## **RBI Interest Rate**

In [None]:
df7 = pd.read_csv("MacroData/Rbiinterest.csv")
rbiinterest = df7.copy()
rbiinterest["Date"] = pd.to_datetime(rbiinterest['Date'])
rbiinterest.info()
rbiinterest.head(2)

In [None]:
daily_df = pd.merge(daily_df, rbiinterest, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

---

## **US Inflation**

In [None]:
df8 = pd.read_csv("MacroData/USinflationdata.csv")
USinflation = df8.copy()
USinflation["Date"] = pd.to_datetime(USinflation['Date'])
USinflation.head()

adding a column that shows percentage change

In [None]:
USinflation['USInflationRate%chng'] = USinflation['USInflation Rate (%)'].pct_change() * 100 

In [None]:
USinflation['USInflationRate%chng']=USinflation['USInflationRate%chng'].fillna('0').astype(float)

In [None]:
daily_df = pd.merge(daily_df, USinflation, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

In [None]:
USinflation.info()
USinflation.head(2)

---

## **India Inflation Rate**

In [None]:
df9 = pd.read_csv("MacroData/IndiaInflation.csv")
IndiaInflation = df9.copy()
IndiaInflation["Date"] = pd.to_datetime(IndiaInflation['Date'])
IndiaInflation.head()

In [None]:
IndiaInflation['IndiaInflationRate(%)chng'] = IndiaInflation['IndiaInflationRate(%)'].pct_change() * 100 

In [None]:
IndiaInflation['IndiaInflationRate(%)chng']=IndiaInflation['IndiaInflationRate(%)chng'].fillna('0').astype(float)

In [None]:
IndiaInflation.info()
IndiaInflation.head(2)

In [None]:
daily_df = pd.merge(daily_df, IndiaInflation, how='left', on='Date')
daily_df.ffill(inplace=True)
daily_df.info()

In [None]:
daily_df.tail(20)

### We can observe that there are about **17** rows that are NaN in the begning of data out of **4015** entries so we can replace them with most recent available entry

Columns with NaN values are as below

**Fedinterest,	Fed%change,	Rbiinterest,	Rbi%change,	USInflation Rate (%),	USInflationRate%chng,	IndiaInflationRate(%),	IndiaInflationRate(%)chng**

In [None]:
daily_df['Fedinterest'] = daily_df['Fedinterest'].bfill()

In [None]:
daily_df['Fed%change'] = daily_df['Fed%change'].bfill()

In [None]:
daily_df['Rbiinterest'] = daily_df['Rbiinterest'].bfill()

In [None]:
daily_df['Rbi%change'] = daily_df['Rbi%change'].bfill()

In [None]:
daily_df['USInflation Rate (%)'] = daily_df['USInflation Rate (%)'].bfill()

In [None]:
daily_df['USInflationRate%chng'] = daily_df['USInflationRate%chng'].bfill()

In [None]:
daily_df['IndiaInflationRate(%)'] = daily_df['IndiaInflationRate(%)'].bfill()

In [None]:
daily_df['IndiaInflationRate(%)chng'] = daily_df['IndiaInflationRate(%)chng'].bfill()

In [None]:
daily_df.ffill(inplace=True)
daily_df.info()

In [None]:
daily_df.head()

---

## **IndianBudgetDates**

In [None]:
df10 = pd.read_csv("MacroData/IndianBudgetDates.csv")
IndianBudgetDates = df10.copy()
IndianBudgetDates["Date"] = pd.to_datetime(IndianBudgetDates['Date'])
IndianBudgetDates.head()

Adding above df in dialy_df and filling NaN values with 0

In [None]:
daily_df = pd.merge(daily_df, IndianBudgetDates, how='left', on='Date')
daily_df['IndiaBudgetDatesMarker'].fillna(0)
daily_df['IndiaBudgetDatesMarker'].head(4)

Function to create **Feature** which will help machine anticepate the Indian Budget

In [None]:
# # Initialize the counter and output list
# counter = 0
# anticipation = []

# # Loop through the column values
# for value in daily_df["IndiaBudgetDatesMarker"]:
#     if value == 1:
#         counter = 0  # Reset counter when marker is 1
#     anticipation.append(counter)
#     counter += 1

# # Assign the output list to a new column
# daily_df["IndiaBudgetDatesAnticipation"] = anticipation



In [None]:

def calculate_anticipation(df, marker_column, output_column):
    counter = 0
    anticipation = []

    for value in df[marker_column]:
        if value == 1:
            counter = 0  # Reset counter when marker is 1
        anticipation.append(counter)
        counter += 1

    df[output_column] = anticipation
    return df

In [None]:
calculate_anticipation(daily_df, "IndiaBudgetDatesMarker", "IndiaBudgetDatesAnticipation")

Values of **"IndiaBudgetDatesAnticipation" is reseting** at new budget event so we can say it's working, **now we can drop "IndiaBudgetDatesMarker"**

In [None]:
daily_df = daily_df.drop("IndiaBudgetDatesMarker", axis=1)
daily_df.info()
daily_df[1:-136]

Changing columns to right datatype

---

## **IndiaElectionDates**

In [None]:
df11 = pd.read_csv("MacroData/IndiaElectionDates.csv")
IndiaElectionDates = df11.copy()
IndiaElectionDates["Date"] = pd.to_datetime(IndiaElectionDates['Date'])
IndiaElectionDates.head()

Adding above df in dialy_df and filling NaN values with 0

In [None]:
daily_df = pd.merge(daily_df, IndiaElectionDates, how='left', on='Date')
daily_df['IndiaElectionDatesMarker'].fillna(0)
daily_df['IndiaElectionDatesMarker'].head(4)

Creating and adding **"anticepation"** feature

In [None]:
calculate_anticipation(daily_df, "IndiaElectionDatesMarker", "IndiaElectionDatesAnticipation")

Checking if new feature is added correctly and droping the marker column

In [None]:
daily_df = daily_df.drop("IndiaElectionDatesMarker", axis=1)
daily_df.info()
daily_df[1:-136]

---

## **UsElectionDates**

In [None]:
df12 = pd.read_csv("MacroData/UsElectionDates.csv")
UsElectionDates = df12.copy()
UsElectionDates["Date"] = pd.to_datetime(UsElectionDates['Date'])
UsElectionDates.head()

Adding above df in dialy_df and filling NaN values with 0

In [None]:
daily_df = pd.merge(daily_df, UsElectionDates, how='left', on='Date')


In [None]:
daily_df['UsElectionDatesMarker'].fillna(0)


In [None]:
daily_df.head(107)

Creating and adding **"anticepation"** feature

In [None]:
calculate_anticipation(daily_df, "UsElectionDatesMarker", "UsElectionDatesAnticepation")