In [118]:
import numpy as np
import pandas as pd
import os

In [119]:
BASE_PATH    = 'Datasets/'
DATA_PATH    = f'{BASE_PATH}/SCRIP/'
INDEX_PATH   = f'{BASE_PATH}/INDEX/'
TICKERS_PATH = f'{BASE_PATH}NSE Symbols.CSV'

## 1 - data collection

In [120]:
# !python3 downloader.py --tickers TICKERS_PATH --start 2019-01-01 --end 2020-01-01

### read tickers

In [121]:
tickers = pd.read_csv(TICKERS_PATH)
tickers

Unnamed: 0,Scrip,Company Name
0,20MICRONS,20 Microns Limited
1,21STCENMGM,21st Century Management Services Limited
2,3IINFOTECH,3i Infotech Limited
3,3MINDIA,3M India Limited
4,5PAISA,5Paisa Capital Limited
...,...,...
231,CONTROLPR,Control Print Limited
232,CORALFINAC,Coral India Finance & Housing Limited
233,CORDSCABLE,Cords Cable Industries Limited
234,COROMANDEL,Coromandel International Limited


### filter by date & number of entries

In [122]:
entries_info = pd.DataFrame(data=[], columns=['ticker', 'entries'])
valid_indices = []

need_filter = True
entries_threshold = 200

if need_filter:
    for index, (ticker, name) in tickers.iterrows():
        try:
            data = pd.read_csv(f'{DATA_PATH}{ticker}.csv')
            data = data.loc[data['Date'].str.startswith('2019')]
            if len(data) >= entries_threshold:
                data.to_csv(f'{DATA_PATH}{ticker}.csv', index=False)
                valid_indices.append(index)
            else:
                os.remove(f'{DATA_PATH}{ticker}.csv')
            entries_info = entries_info.append({'ticker': ticker,
                                                'entries': len(data)}, 
                                                ignore_index=True)
        except:
            pass
    
    tickers = tickers.loc[tickers.index.isin(valid_indices)]
    tickers.to_csv(TICKERS_PATH, index=False)
    
entries_info = entries_info.sort_values(by='entries', ascending=False)
entries_info

Unnamed: 0,ticker,entries
0,20MICRONS,245
161,BIOCON,245
148,BGRENERGY,245
149,BHAGERIA,245
150,BHAGYANGR,245
...,...,...
61,ANIKINDS,223
185,CALSOFT,220
212,CHALET,218
112,BAGFILMS,217


In [123]:
data_for_ticker = {}
for index, (ticker, name) in tickers.iterrows():
    data_for_ticker[ticker] = pd.read_csv(f'{DATA_PATH}{ticker}.csv')

## 2 - assets map

### compute log return

In [124]:
for ticker, data in data_for_ticker.items():
    data_for_ticker[ticker] = data.assign(logret=np.log(data.Close).diff())

In [125]:
data_for_ticker['BAGFILMS']

Unnamed: 0,Date,Symbol,Series,Prev Close,Open,High,Low,Last,Close,VWAP,Volume,Turnover,Trades,Deliverable Volume,%Deliverble,logret
0,2019-01-01,BAGFILMS,EQ,4.95,5.00,5.15,4.85,5.00,4.95,5.01,202343,1.013573e+11,254.0,147939.0,0.7311,
1,2019-01-02,BAGFILMS,EQ,4.95,4.90,5.00,4.85,4.95,4.90,4.92,66516,3.272127e+10,162.0,58430.0,0.8784,-0.010152
2,2019-01-03,BAGFILMS,EQ,4.90,4.90,5.00,4.85,4.90,4.85,4.92,52702,2.590396e+10,109.0,38203.0,0.7249,-0.010257
3,2019-01-04,BAGFILMS,EQ,4.85,4.80,4.90,4.80,4.80,4.85,4.84,18077,8.758105e+09,76.0,15872.0,0.8780,0.000000
4,2019-01-07,BAGFILMS,EQ,4.85,4.85,4.95,4.85,4.90,4.85,4.88,49464,2.413514e+10,102.0,37488.0,0.7579,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,2019-11-14,BAGFILMS,EQ,2.20,2.30,2.30,2.10,2.15,2.20,2.24,693312,1.554927e+11,565.0,395392.0,0.5703,0.000000
213,2019-11-15,BAGFILMS,EQ,2.20,2.10,2.10,2.10,2.10,2.10,2.10,35325,7.418250e+09,93.0,35325.0,1.0000,-0.046520
214,2019-11-18,BAGFILMS,EQ,2.10,2.00,2.00,2.00,2.00,2.00,2.00,87810,1.756200e+10,129.0,87790.0,0.9998,-0.048790
215,2019-11-19,BAGFILMS,EQ,2.00,1.90,2.05,1.90,2.05,2.00,1.94,201290,3.899737e+10,286.0,157775.0,0.7838,0.000000


### compute logret_mean, logret_std

In [126]:
estims = pd.DataFrame(data=[], columns=['ticker', 'logret_mean', 'logret_std'])
estims.set_index('ticker', inplace=True)

for ticker, data in data_for_ticker.items():
    estims.loc[ticker] = [data['logret'].mean(), data['logret'].std()]
    
estims

Unnamed: 0_level_0,logret_mean,logret_std
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
20MICRONS,-0.000938,0.023810
21STCENMGM,-0.001896,0.014912
3IINFOTECH,-0.002569,0.039587
3MINDIA,0.000125,0.018969
5PAISA,-0.001199,0.043249
...,...,...
CONTROLPR,-0.001061,0.026090
CORALFINAC,-0.001317,0.034008
CORDSCABLE,-0.001831,0.024574
COROMANDEL,0.000677,0.015572


### plot the map

In [127]:
import plotly.express as px
from sklearn.preprocessing import normalize

ticker_colour = estims['logret_mean'] / estims['logret_std']
ticker_size = (ticker_colour - ticker_colour.min()) / ticker_colour.max()
fig = px.scatter(estims, x='logret_std', y='logret_mean',
                 hover_name=estims.index, 
                 color=ticker_colour,
                 size=ticker_size)
fig.show()

## 3 - uniform portfolio

In [128]:
import plotly.graph_objects as go

uniform_logret_mean = estims['logret_mean'].mean()
uniform_logret_std = estims['logret_std'].mean()
fig.add_scatter(x=[uniform_logret_std], y=[uniform_logret_mean], mode="markers",
                marker=dict(size=20, color="LightSeaGreen"))

