# Company Baselines Calculation Script

## Import required modules

In [2]:
import pandas as pd
import os



## Calculate company baselines

### Get data on all companies

In [3]:
all_companies = pd.read_csv('../STORAGE/COMPANIES.csv')
all_tickers = list(all_companies['ticker'])
print(all_tickers)


['ADANIPORTS.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BPCL.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS', 'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'HDFC.NS', 'ICICIBANK.NS', 'ITC.NS', 'IOC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NTPC.NS', 'NESTLEIND.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SHREECEM.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TCS.NS', 'TATACONSUM.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TECHM.NS', 'TITAN.NS', 'UPL.NS', 'ULTRACEMCO.NS', 'WIPRO.NS']


### Initialize dictionary to store company baselines data

In [3]:
company_baselines_data = {}

keys = ['ticker', '2017-2020']+[f'{2017 + yr}' for yr in range(4)] \
+[f'{2017 + yr}_Q{qtr}' for qtr in range(1, 5) for yr in range(4)] \
+[f'{2017 + yr}_M{mth}' for mth in range(1, 13) for yr in range(4)]

for key in keys:
	company_baselines_data[key] = []


### Function to calculate baseline percents

In [4]:
def calc_baseline_percent(ticker_data, start_index, end_index):
	return round(((ticker_data['close'][end_index] - ticker_data['close'][start_index]) / ticker_data['close'][start_index]) * 100, 2)


### Calculate baselines

In [5]:
# For each company
for ticker in all_tickers:

	# Add company ticker
	company_baselines_data['ticker'].append(ticker)

	# Get Data
	ticker_data = pd.read_csv(f"../STORAGE/TICKER_DATA/{ticker}.csv", parse_dates=True)
	ticker_data_len = len(ticker_data)

	# Calculate 4 years baseline %
	baseline_4_yr = calc_baseline_percent(ticker_data, 0, ticker_data_len - 1)
	company_baselines_data['2017-2020'].append(baseline_4_yr)

	# Calculate yearly baseline
	for year in range(2017, 2021):
		mask = (ticker_data['time_stamp'] >= f'{year}-01-01') & (ticker_data['time_stamp'] < f'{year+1}-01-01')
		yearly_ticker_data = ticker_data.loc[mask]
		company_baselines_data[f'{year}'].append(calc_baseline_percent(ticker_data, yearly_ticker_data.index[0], yearly_ticker_data.index[-1]))

		# Comment to run for all 4 years
		# break

	# Calculate quaterly baselines
	for year in range(2017, 2021):

		quater_count = 0

		for qtr in [(f'{year}-01-01', f'{year}-04-01'), 
					(f'{year}-04-01', f'{year}-07-01'), 
					(f'{year}-07-01', f'{year}-10-01'), 
					(f'{year}-10-01', f'{year+1}-01-01')]:

			quater_count += 1
			mask = (ticker_data['time_stamp'] >= qtr[0]) & (ticker_data['time_stamp'] < qtr[1])
			quaterly_ticker_data = ticker_data.loc[mask]
			if quaterly_ticker_data.empty:
				company_baselines_data[f'{year}_Q{quater_count}'].append(0.0)
			else:
				company_baselines_data[f'{year}_Q{quater_count}'].append(calc_baseline_percent(ticker_data, quaterly_ticker_data.index[0], quaterly_ticker_data.index[-1]))

			# Comment to run for all quaters in a year
			# break
			
		# Comment to run for all years
		# break
	
	# Calculate Monthly baselines
	for year in range(2017, 2021):

		for month in range(1, 13):

			start_date = f'{year}-{month}-01'
			end_date = f'{year}-{month+1}-01'
			if month == 12:
				end_date = f'{year+1}-01-01'
			elif month == 9:
				start_date = f'{year}-0{month}-01'
				end_date = f'{year}-{month+1}-01'
			elif month < 10:
				start_date = f'{year}-0{month}-01'
				end_date = f'{year}-0{month+1}-01'
			
			mask = (ticker_data['time_stamp'] >= start_date) & (ticker_data['time_stamp'] < end_date)

			monthly_ticker_data = ticker_data.loc[mask]
			if monthly_ticker_data.empty:
				company_baselines_data[f'{year}_M{month}'].append(0.0)
			else:
				company_baselines_data[f'{year}_M{month}'].append(calc_baseline_percent(ticker_data, monthly_ticker_data.index[0], monthly_ticker_data.index[-1]))

			# Comment to run for all months in a year
			# break
			
		# Comment to run for all years
		# break

	# Comment to run for all 50 companies
	# break


## Convert data to a dataframe

In [6]:
companies_baseline_df = pd.DataFrame.from_dict(company_baselines_data)
companies_baseline_df.head()


Unnamed: 0,ticker,2017-2020,2017,2018,2019,2020,2017_Q1,2018_Q1,2019_Q1,2020_Q1,...,2019_M10,2020_M10,2017_M11,2018_M11,2019_M11,2020_M11,2017_M12,2018_M12,2019_M12,2020_M12
0,ADANIPORTS.NS,76.68,45.96,-2.11,-0.96,26.26,24.03,-10.9,0.46,-36.11,...,-3.5,2.71,-11.55,9.61,-2.29,18.74,2.96,3.55,-0.03,10.39
1,ASIANPAINT.NS,205.59,26.42,20.02,29.63,54.39,18.66,-1.96,7.91,-10.47,...,0.99,9.05,-3.79,7.94,-4.47,2.63,2.02,2.45,3.16,19.37
2,AXISBANK.NS,38.57,26.3,12.12,20.75,-18.03,9.61,-8.76,23.47,-52.62,...,11.95,10.89,0.32,2.47,-0.48,15.5,5.71,0.4,0.59,2.03
3,BAJAJ-AUTO.NS,32.54,26.58,-17.24,17.02,10.35,7.96,-16.69,8.31,-34.28,...,11.55,-2.22,-0.75,3.31,-1.12,14.06,2.83,-0.34,-0.29,3.17
4,BAJFINANCE.NS,508.61,98.34,54.05,61.87,24.71,34.6,2.49,14.82,-47.73,...,2.78,-2.18,-5.45,6.48,-0.94,42.15,2.97,5.91,7.11,9.77


## Store data in a csv

In [7]:
companies_baseline_df.to_csv('../baselines/COMPANIES_BASELINES.csv', index=False)
