### Load libraries

In [1]:
import os, sys, copy, time
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import statsmodels.api as sm

### Load the data

In [2]:
final_result = pd.read_csv('./../data/final_table.csv')
final_result.shape

(4521, 16)

### Concatenate with other information

In [2]:
# Load the data about other information
information1 = pd.read_table('./../map_data/76692_1_70_20221207_125235_dat.txt', sep = '|')
print(information1.shape)
# information1.head()

(87263, 8)


In [3]:
information1.head()

Unnamed: 0,sa_finance1_cocode,sa_company_name,sa_finance1_year,sa_ann_rep_months,sa_fs_format,sa_pat_net_of_pe_pc_avg_tot_asset_net_menow_reval,sa_total_assets,sa_debt_equity
0,100044,INDUCTO STEEL LTD.,31-03-1995,12.0,IGAAP,,41.3,0.2135
1,100044,INDUCTO STEEL LTD.,31-03-1996,12.0,IGAAP,11.6349,59.0,0.5325
2,100044,INDUCTO STEEL LTD.,31-03-1997,12.0,IGAAP,0.1667,62.2,0.0554
3,100044,INDUCTO STEEL LTD.,31-03-1998,12.0,IGAAP,0.0,63.7,0.0751
4,100044,INDUCTO STEEL LTD.,31-03-1999,12.0,IGAAP,-0.9288,70.1,0.238


In [4]:
information2 = pd.read_table('./../map_data/76694_1_5_20221207_131853_dat.txt', sep = '|')
print(information2.shape)
information2.head()

(3626, 8)


Unnamed: 0,co_code,company_name,co_industry_gp_code,co_industry_name,co_nic_code,nic_prod_code,nic_name,incorporation_year
0,100044,INDUCTO STEEL LTD.,101040401000000,Wholesale trading,460901090000,46909,Other non-specialised wholesale trade n.e.c.,1988.0
1,100257,INDUSTRIAL & PRUDENTIAL INVST. CO. LTD.,102040000000000,Other fund based financial services,640301010000,64300,"Trusts, funds and other financial vehicles",1913.0
2,100315,INDUSTRIAL INVESTMENT TRUST LTD.,102040000000000,Other fund based financial services,640909010000,64990,"Other financial service activities, except ins...",1933.0
3,100644,CYIENT LTD.,101040801000000,Computer software,620101030000,62013,Providing software support and maintenance to ...,1991.0
4,100709,I D F C LTD.,102040000000000,Other fund based financial services,640301010000,64300,"Trusts, funds and other financial vehicles",1997.0


In [5]:
# Return On Assets in (t-1) (here it is 2018)
roa = information1[information1['sa_finance1_year'] == '31-03-2018'][['sa_company_name','sa_pat_net_of_pe_pc_avg_tot_asset_net_menow_reval']]
roa.columns = ['company_name', 'roa']
roa.reset_index(drop = True, inplace = True)
roa.head()

Unnamed: 0,company_name,roa
0,INDUCTO STEEL LTD.,0.4102
1,INDUSTRIAL & PRUDENTIAL INVST. CO. LTD.,21.325
2,INDUSTRIAL INVESTMENT TRUST LTD.,-6.0075
3,CYIENT LTD.,13.3033
4,I D F C LTD.,1.6832


In [6]:
# leverage in (t-1) (here it is 2018)
leverage = information1[information1['sa_finance1_year'] == '31-03-2018'][['sa_company_name','sa_debt_equity']]
leverage.columns = ['company_name', 'leverage']
leverage.reset_index(drop = True, inplace = True)
leverage.head()

Unnamed: 0,company_name,leverage
0,INDUCTO STEEL LTD.,0.0
1,INDUSTRIAL & PRUDENTIAL INVST. CO. LTD.,0.0
2,INDUSTRIAL INVESTMENT TRUST LTD.,0.0
3,CYIENT LTD.,0.0
4,I D F C LTD.,0.0


In [5]:
# first two digit of nic_prod_code
information2['nic_prod_code'] = information2['nic_prod_code'].apply(lambda x: str(x)[0:2])
nic_prod_code = information2[['company_name','nic_prod_code', 'co_industry_name']]
nic_prod_code.reset_index(drop = True, inplace = True)
nic_prod_code.head()

Unnamed: 0,company_name,nic_prod_code,co_industry_name
0,INDUCTO STEEL LTD.,46,Wholesale trading
1,INDUSTRIAL & PRUDENTIAL INVST. CO. LTD.,64,Other fund based financial services
2,INDUSTRIAL INVESTMENT TRUST LTD.,64,Other fund based financial services
3,CYIENT LTD.,62,Computer software
4,I D F C LTD.,64,Other fund based financial services


In [10]:
nic_prod_code['co_industry_name'].unique()

array(['Wholesale trading', 'Other fund based financial services',
       'Computer software', 'General purpose machinery',
       'Other consumer goods', 'Plastic packaging goods', 'Tyres & tubes',
       'Exhibition of films', 'Pesticides',
       'Generators, transformers & switchgears',
       'Computers, peripherals & storage devices',
       'Other fee based financial services',
       'Business services & consultancy', 'Diversified',
       'Road transport services', 'Castings & forgings',
       'Mining & construction equipment',
       'Plastic furniture, floorings & miscellaneous items',
       'Other construction & allied activities', 'Securities broking',
       'Telecommunication services', 'Industrial machinery',
       'Drugs & pharmaceuticals', 'Industrial construction', 'ITES',
       'Dyes & pigments', 'Hotels & restaurants', 'Steel',
       'Man-made filaments & fibres', 'Other automobile ancillaries',
       'Other textiles', 'Cement', 'Dairy products', 'Paper & new

In [8]:
final_result2 = pd.merge(pd.merge(pd.merge(final_result, roa, on = 'company_name'), leverage, on='company_name'), nic_prod_code, on='company_name')
print(final_result2.shape)
final_result2.head()

(4474, 19)


Unnamed: 0,symbol,banker_name,co_code,company_name,exchange,beta,mes,vulnerable,type,time,count,nskew,duvol,ic_count,ic_nskew,ic_duvol,roa,leverage,nic_prod_code
0,SBIN.NS,STATE BANK OF INDIA,11,20 MICRONS LTD.,NSE,0.247401,0.001617,1,Public,0,0,-0.626466,-0.6558,1,0.270789,-0.229834,4.3457,0.982,8
1,SBIN.NS,STATE BANK OF INDIA,11,20 MICRONS LTD.,BSE,0.247401,0.001617,1,Public,1,-1,-0.814185,-0.664436,1,0.270789,-0.229834,4.3457,0.982,8
2,SBIN.NS,STATE BANK OF INDIA,576805,A & M JUMBO BAGS LTD.,BSE,0.247401,0.001617,1,Public,1,0,0.187605,0.257741,0,-0.227228,-0.010065,6.8627,0.0737,22
3,SBIN.NS,STATE BANK OF INDIA,576805,A & M JUMBO BAGS LTD.,NSE,0.247401,0.001617,1,Public,0,0,0.136015,0.074015,0,-0.227228,-0.010065,6.8627,0.0737,22
4,SBIN.NS,STATE BANK OF INDIA,389178,A 2 Z INFRA ENGG. LTD.,BSE,0.247401,0.001617,1,Public,1,-3,-0.763392,-0.171274,0,-0.059138,-0.183658,-6.1368,1.4573,71


In [9]:
final_result2.dropna(inplace = True)
final_result2.to_csv('./../data/final_table2.csv', index = False)

In [10]:
final_result2.shape

(4204, 19)

In [12]:
np.sum(final_result == np.Inf)

symbol          0
banker_name     0
co_code         0
company_name    0
exchange        0
beta            0
mes             0
vulnerable      0
type            0
time            0
count           0
nskew           0
duvol           0
ic_count        0
ic_nskew        0
ic_duvol        0
dtype: int64