# Company Baselines Calculation Script

## Import required modules

In [177]:
import pandas as pd
import os


## Calculate company baselines

### Get data on all companies

In [178]:
all_companies = pd.read_csv('../STORAGE/COMPANIES.csv')
all_tickers = list(all_companies['ticker'])


### Initialize dictionary to store company baselines data

In [179]:
company_baselines_data = {}

keys = ['ticker', '2017-2020']+[f'{2017 + yr}' for yr in range(4)] \
+[f'{2017 + yr}_Q{qtr}' for qtr in range(1, 5) for yr in range(4)] \
+[f'{2017 + yr}_M{mth}' for mth in range(1, 13) for yr in range(4)]

for key in keys:
	company_baselines_data[key] = []


### Function to calculate baseline percents

In [180]:
def calc_baseline_percent(start_index, end_index):
	return round(((ticker_data['close'][end_index] - ticker_data['close'][start_index]) / ticker_data['close'][0]) * 100, 2)


### Calculate baselines

In [181]:
# For each company
for ticker in all_tickers:

	# Add company ticker
	company_baselines_data['ticker'].append(ticker)

	# Get Data
	ticker_data = pd.read_csv(f"../STORAGE/TICKER_DATA/{ticker}.csv", parse_dates=True)
	ticker_data_len = len(ticker_data)

	# Calculate 4 years baseline %
	baseline_4_yr = calc_baseline_percent(0, ticker_data_len - 1)
	company_baselines_data['2017-2020'].append(baseline_4_yr)

	# Calculate yearly baseline
	for year in range(2017, 2021):
		mask = (ticker_data['time_stamp'] >= f'{year}-01-01') & (ticker_data['time_stamp'] < f'{year+1}-01-01')
		yearly_ticker_data = ticker_data.loc[mask]
		company_baselines_data[f'{year}'].append(calc_baseline_percent(yearly_ticker_data.index[0], yearly_ticker_data.index[-1]))

		# Comment to run for all 4 years
		# break

	# Calculate quaterly baselines
	for year in range(2017, 2021):

		quater_count = 0

		for qtr in [(f'{year}-01-01', f'{year}-04-01'), 
					(f'{year}-04-01', f'{year}-07-01'), 
					(f'{year}-07-01', f'{year}-10-01'), 
					(f'{year}-10-01', f'{year+1}-01-01')]:

			quater_count += 1
			mask = (ticker_data['time_stamp'] >= qtr[0]) & (ticker_data['time_stamp'] < qtr[1])
			quaterly_ticker_data = ticker_data.loc[mask]
			if quaterly_ticker_data.empty:
				company_baselines_data[f'{year}_Q{quater_count}'].append(0.0)
			else:
				company_baselines_data[f'{year}_Q{quater_count}'].append(calc_baseline_percent(quaterly_ticker_data.index[0], quaterly_ticker_data.index[-1]))

			# Comment to run for all quaters in a year
			# break
			
		# Comment to run for all years
		# break
	
	# Calculate Monthly baselines
	for year in range(2017, 2021):

		for month in range(1, 13):

			start_date = f'{year}-{month}-01'
			end_date = f'{year}-{month+1}-01'
			if month == 12:
				end_date = f'{year+1}-01-01'
			elif month == 9:
				start_date = f'{year}-0{month}-01'
				end_date = f'{year}-{month+1}-01'
			elif month < 10:
				start_date = f'{year}-0{month}-01'
				end_date = f'{year}-0{month+1}-01'
			
			mask = (ticker_data['time_stamp'] >= start_date) & (ticker_data['time_stamp'] < end_date)

			monthly_ticker_data = ticker_data.loc[mask]
			if monthly_ticker_data.empty:
				company_baselines_data[f'{year}_M{month}'].append(0.0)
			else:
				company_baselines_data[f'{year}_M{month}'].append(calc_baseline_percent(monthly_ticker_data.index[0], monthly_ticker_data.index[-1]))

			# Comment to run for all months in a year
			# break
			
		# Comment to run for all years
		# break

	# Comment to run for all 50 companies
	# break


## Convert data to a dataframe

In [182]:
companies_baseline_df = pd.DataFrame.from_dict(company_baselines_data)
companies_baseline_df.head()


Unnamed: 0,ticker,2017-2020,2017,2018,2019,2020,2017_Q1,2018_Q1,2019_Q1,2020_Q1,...,2019_M10,2020_M10,2017_M11,2018_M11,2019_M11,2020_M11,2017_M12,2018_M12,2019_M12,2020_M12
0,ADANIPORTS.NS,76.68,45.96,-3.07,-1.33,36.74,24.03,-15.81,0.64,-50.53,...,-5.17,3.47,-18.44,11.71,-3.27,24.43,4.2,4.88,-0.04,16.64
1,ASIANPAINT.NS,205.59,26.42,25.29,45.31,107.65,18.66,-2.47,12.1,-20.72,...,1.92,20.28,-4.94,10.94,-8.82,6.32,2.5,3.63,6.07,49.59
2,AXISBANK.NS,38.57,26.3,15.14,28.73,-30.49,9.61,-10.94,32.51,-88.96,...,17.84,10.8,0.38,3.37,-0.79,18.09,6.82,0.56,0.98,2.76
3,BAJAJ-AUTO.NS,32.54,26.58,-21.86,17.63,12.43,7.96,-21.17,8.61,-41.17,...,12.88,-2.52,-0.93,3.38,-1.39,15.39,3.48,-0.36,-0.35,4.08
4,BAJFINANCE.NS,508.61,98.34,107.14,185.88,120.59,34.6,4.93,44.53,-232.94,...,12.65,-8.49,-11.33,17.75,-4.46,165.66,5.71,17.03,32.28,54.17


## Store data in a csv

In [183]:
companies_baseline_df.to_csv('../baselines/COMPANIES_BASELINES.csv', index=False)