In [None]:
!pip install monthdelta
!pip install yfinance

```
Stock Universe : Nifty 200
Nifty 200 list gets updated twice in a year .
```

In [None]:
import numpy as np
import pandas as pd
import yfinance as yfin
from datetime import date,timedelta
from monthdelta import monthdelta
from tqdm import tqdm

# as I am looking for top 200 companies in indian indices , these companies must be listed on either
#bombay stock exchange or national stock exchange



```
t=yfin.download('JSWINFRA.BO',start= date(2023, 4, 3), end= date(2023, 4, 4))
outputs the daily open and close price of the stock in Pandas DataFrame
```




**Filter out companies who are not listed on any of stock exchange from the start date**






In [6]:
def filter(nifty200,start):
  """
  filter out companies who are not listed on any of stock exchange from the start date

  @params nifty200: pandas dataframe built from nifty200 csv file
  @params start : start  date

  """
  rem=[]
  for i in tqdm(range(len(nifty200))):
    name=nifty200['Symbol'].iloc[i]+".BO"
    data=yfin.download(name,start=start , end= start+timedelta(1))
    # for customs date one can give parameter to start and end variables .
    # for end date parameter keep the value one day ahead of last date .


    if len(data)==0: # stock is not listed on BSE
      name=nifty200['Symbol'].iloc[i]+".NS" # try from NSE
      data=yfin.download(name,start=start  , end=start+timedelta(1))


    #print(len(data))
    # if the company  data is not present on both then flag it

    if len(data)==0 :
      rem.append(i)

  #print((rem))
  niftyfilter=nifty200.drop(index=rem)

  return niftyfilter



In [None]:
def perc_return(nifty200,companies,start,end):
  """
  Finding return final price/initial price .

  @params nifty200  :provides list of symbols for top 200 nifty companies.
  @params companies :provides list of top 200 nifty companies.
  @params start     :refers to initial date of holding period.
  @params end       :refers to final date  of holding period.

  """
  val=[]
  names=[]

  for i in tqdm(range(len(nifty200))):
    name=nifty200[i]+".BO"
    data=yfin.download(name,start=start , end= end)
    # J-1 months of holding.

    if len(data)==0:
      name=nifty200[i]+".NS" # trying to fectch data from national stock exchange
      data=yfin.download(name,start=start  , end=end)


    #print(len(data))
    # if the company  data is not present on both then flag it

    assert len(data ) !=0 ,"check if the name is correct or company is  listed or not"

    # data is  already sorted based on increasing trading dates.
    final=data.tail(1) # close
    initial=data.head(1) #open
    #print(name,final,initial,"\n")
    value=final["Close"].iloc[0]/initial["Open"].iloc[0] -1
    val.append(value)
    #adding the name of exchange which provides the stocks data for the company.
    names.append(name)


  df=pd.DataFrame(val,companies,["return"])
  df['Symbol']=nifty200
  df['exchange']=names
  return  df

**Volatility in the daily stock price**

In [None]:

def variance_perday(returnDF,exchange,start,end):
  """
  Volatility in the daily 'open/close' stock price .

  @params returnDF :the dataframe provided by perc_return function
  @params exchange : the list of exchnages from where past stock price data is fetched.
  @params start     :refers to initial date of holding period.
  @params end       :refers to final date  of holding period.
  """
  # here i need to make some changes .. some more finer level structuring .
  val=[]
  for i in tqdm(range(len(exchange))):
    data=yfin.download(exchange[i],start=start , end=end)
    # no need to provide end date parameter higher . that's fine !!
    #collect=[]
    let=data["Close"].to_list()
    let.insert(0,data["Open"].iloc[0])
    data["numer"]=let[:-1]
    data['perday']=data['Close']/data["numer"] -1

    volatility=np.std(np.array(data["perday"].to_list()))
    val.append(volatility)

    assert volatility !=0 , "variance in stock price can't be zero"


  returnDF['volatile']=val
  return returnDF

In [None]:
def finalset(  data,param,nifty100,size=15  ):
  """
  Gives final subset of companies !!

  @params :data is the pandas dataframe containing the return and volatility for each companies
  @params :param is the list of column names based on which we will sort the data
  @params :nifty100 is the list of all companies which are in nifty100
  """
  Df={}
  for i in param:
    Df[param[i]]=data.sort_values(i,ascending=False).head(size)

  # I want to label them whether they are
  # I am only labelling between midcap or largecap
  # because my base universe is not  going to include smallcaps
  # applying absolute momentum to them is not fair I feel !!!!!!!! mehhhh just my thoughts !!!! mehhhh
  for key in Df.keys():
    table=Df[key]
    companylist=table.index
    add=[]
    for company in companylist:
      if company in nifty100:
        add.append("largecap")
      else:
        add.append("midcap")
    Df[key]["label"]=add

  return Df



```
Backtest implementation
```



In [None]:
def together(start,  nifty200,companies,holding_period=6, returns =1.0,size=15):
  """
   first part of backtest implementation

  @params start  :refers to initial date of holding period.
  @params nifty200 :provides list of symbols for top 200 nifty companies.
  @params comapnies :provides list of top 200 nifty companies.
  @params holding_period  :observation time period for past data for building the current subset
  @params returns  :return till now (floating datatype)
  @params size  :no of companies in the final subset

  """

  #holding period-1 months of data is analysed
  initial=start-monthdelta(holding_period)
  final=start-monthdelta(1)
  data=perc_return(nifty200,companies,initial,final)
  exchange=data['exchange'].to_list()
  #data2=variance_perday(data,exchange,initial,final)  if you want volatility data also then uncomment this !!
  data2=data
  # now data2 contains companies based return , exchange names , volatility in that period.
  final=data2.sort_values("return",ascending=False).head(size)
  print(final)
  #now we have the datasheet
  sum=0.0
  for i in tqdm(range(size)):
    exc=final.iloc[i]['exchange']
    next=start+monthdelta(1)
    take=yfin.download(exc,start,next+timedelta(1))
    first=take.head(1).iloc[0]['Open']
    last=take.tail(1).iloc[0]['Open']
    sum=sum+(last/first)

  sum=sum/size  # currently all stocks are assumed to be equiweighted

  return (sum) # return in decimal

In [None]:
def backtest(start,data,holding_period=6,investment_period=12,returns=1.0,size=15):
  """
  Final part of backtest implementation (total return )

  @params investment_period :time period for backtesting the strategy.
  @params data :pandas dataframe built from nifty200 csv file.
  """

  # investing period is in months
  #holding period is in months
  #end variable is redundant ?? hmmmmmm
  mul=1.0
  #print(investment_period)

  filterdata=filter(data,start-monthdelta(12))
  nifty200=filterdata['Symbol'].to_list()
  companies=filterdata['Company Name'].to_list()

  for i in range(investment_period):
    ret=together(start,nifty200,companies,holding_period)
    start=start+monthdelta(1)
    mul=mul*ret

    #print(mul-1.0) # total returns at the end of each month

  return mul-1.0

In [None]:
#nifty200=pd.read_csv("/content/nifty200list.csv")
#get=backtest(date(2023, 10, 3),nifty200,6,6,1.0,15)