In [208]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import urllib.request as urllib2
from io import StringIO
import datetime as dt
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import hgutils
import time
stopwatch = hgutils.timer("ALM Algo")
import threading
import concurrent.futures
import fastparquet
import pyarrow
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.renderers
pio.renderers.default = "notebook_connected"

import warnings
warnings.filterwarnings('ignore')

In [209]:
goal_amount = 1_00_000
startDate = dt.date(2015,1,1)
# goal_length = ['1m','2m','3m','6m','1y','2y','3y','5y']
goal_length = ['6m','1y','3y','5y']

# collection_window = ['1d','3d','7d','1m','2m','3m','6m','1y']
collection_window = ['1d','7d','1m']

goal_id = ['startDate','length','collection_window']

In [250]:
def emi_days_range(emi_count):
    return [(i+1)*30 for i in range(emi_count)]

def add_loose_timeline(no_of_days):
    return [d for d in range(no_of_days)]

def date_range(start_date,end_date,frequency=1):
    current_date = start_date
    yield current_date
    current_date = current_date + dt.timedelta(days=frequency)
    while current_date<=end_date:
        yield current_date
        current_date = current_date + dt.timedelta(days=frequency)

def date_range_list(start_date,end_date,frequency=1):
    return [d for d in date_range(start_date=start_date,end_date=end_date)]

v_date_range_list = np.vectorize(date_range_list)

def to_set (l):
    return set(l)

def generate_amfi_url (date,base_url = 'https://portal.amfiindia.com/DownloadNAVHistoryReport_Po.aspx?frmdt='):
    date_str = date.strftime("%d-%b-%Y")
    return base_url+date_str

def get_day_df_recursive (url, break_minutes = 5, selected_schemes = None):
    try:
        page = urllib2.urlopen(url)
        soup = BeautifulSoup(page.read())
        df = pd.read_csv(StringIO(soup.get_text()),sep=';')

        df['Scheme'] = df['Scheme Code']
        df['AMC'] = df['Scheme Code']

        df.loc[~df['Scheme Code'].str.contains("Schemes"),"Scheme"] = None
        df.loc[~df['Net Asset Value'].isnull(),"AMC"] = None

        df[['Scheme','AMC']] = df[['Scheme','AMC']].fillna(method='ffill')

        df['Scheme Type'] = df['Scheme'].str.split(pat="(",expand=True)[1].str.replace(")","").str.strip()
        df['Scheme'] = df['Scheme'].str.split(pat="(",expand=True)[0].str.strip()   

        df.dropna(subset='Scheme Name',inplace=True)

        if selected_schemes is not None:
            df = df[df['Scheme Name'].isin(selected_schemes)]

        
    except:
        stopwatch.start("Break Time")
        time.sleep(break_minutes*60)
        df = get_day_df_recursive(url,break_minutes)
        stopwatch.stop()
    return df

def download_daily_amfi_data(date,selected_schemes=None):
    df = get_day_df_recursive(generate_amfi_url(date),break_minutes=5,selected_schemes=required_schemes)
    df.astype(str).to_parquet("amfi_data_hdfc/"+date.strftime("%Y%b%d")+'.parquet')
    if date.day == 1:
        print(date.strftime("%Y%b%d"))

def get_current_value (date,startDate,length,collection_window):
    s = goal[(goal['startDate']==startDate)
        &(goal['length']==length)
        &(goal['collection_window']==collection_window)
        &(goal['date']<=date)]

    s = s[['Scheme Code','units_bought']].groupby('Scheme Code',as_index=False).sum()
    s['date'] = date
    s = s.merge(fund[['Scheme Code','date','NAV']],on=['Scheme Code','date'],how='left')
    return (s['NAV'] * s['units_bought']).sum()

v_get_current_value = np.vectorize(get_current_value)

In [211]:
cpi = pd.read_excel("cpi.xlsx")
cpi['date'] = pd.to_datetime(cpi['Period'])
cpi['Period'] = cpi['date'].dt.strftime("%Y-%m")

In [212]:
base = dt.date.today()
base = dt.datetime(base.year,base.month,base.day)

date_list = list(pd.date_range(startDate,end=dt.date.today()).date)

goal = pd.DataFrame({'startDate':[date_list],'length':[goal_length],'collection_window':[collection_window]})
goal = (goal
    .explode('startDate')
    .explode('length')
    .explode('collection_window')
    .sort_values(by = 'startDate'))

goal['length_days'] = goal['length'].str[:-1].astype(int) * goal['length'].str[-1:].map({'d':1,'m':30,'y':360})
goal['endDate'] = goal['startDate'] + pd.to_timedelta(goal['length_days'], unit='D')

goal = goal[goal['endDate']<=dt.date.today()]

goal['collection_days'] = (goal['collection_window']
    .str[:-1].astype(int) * goal['collection_window'].str[-1:]
    .map({'d':1,'m':30,'y':360})
    .astype(int))

goal['collection_startDate'] = goal['endDate'] - pd.to_timedelta(goal['collection_days'], unit='D')

goal['emi_count'] = (goal['length_days']/30).astype(int)

goal['goal_startDate'] = goal_amount

goal['startDate_period'] = goal['startDate'].apply(lambda x: x.strftime("%Y-%m"))
goal['endDate_period'] = goal['endDate'].apply(lambda x: x.strftime("%Y-%m"))

goal = (goal
    .merge(cpi[['Period','Index']],left_on='startDate_period',right_on='Period')
    .drop(columns='Period')
    .rename(columns={'Index':'startDate_index'})
    .merge(cpi[['Period','Index']],left_on='endDate_period',right_on='Period')
    .drop(columns='Period')
    .rename(columns={'Index':'endDate_index'}))

goal['goal_endDate'] = goal['endDate_index'] * goal['goal_startDate'] / goal['startDate_index']

goal['emi'] = goal['goal_startDate'] / goal['emi_count']

goal['emi_days_range'] = goal['emi_count'].apply(emi_days_range)
goal['coverage_days_range'] = goal['collection_days'].apply(add_loose_timeline)
goal['days_to_maturity'] = (goal['emi_days_range'] + goal['coverage_days_range']).apply(to_set)

goal = (goal
    .drop(columns=['emi_days_range','coverage_days_range'])
    .explode('days_to_maturity'))

goal['date'] = goal['endDate'] - pd.to_timedelta(goal['days_to_maturity'], unit='D')


goal['pending_investment'] = ((goal['days_to_maturity']-1) / 30).apply(np.floor) * goal['emi']
goal.loc[goal['days_to_maturity']==0,'pending_investment'] = 0

goal['is_payDay'] = False
goal.loc[(goal['days_to_maturity'] % 30 == 0) &(goal['days_to_maturity']!=0), 'is_payDay'] = True

goal['is_collectionPeriod'] = False
goal.loc[goal['date'] >= goal['collection_startDate'], 'is_collectionPeriod'] = True


goal_required_cols = ['date','startDate', 'length', 'collection_window','endDate',
        'goal_endDate','pending_investment',
        'emi','days_to_maturity','is_payDay','is_collectionPeriod']

goal = goal[goal_required_cols]

goal.head(2)

Unnamed: 0,date,startDate,length,collection_window,endDate,goal_endDate,pending_investment,emi,days_to_maturity,is_payDay,is_collectionPeriod
0,2015-06-30,2015-01-01,6m,1d,2015-06-30,102700.421941,0.0,16666.666667,0,False,True
0,2015-01-01,2015-01-01,6m,1d,2015-06-30,102700.421941,83333.333333,16666.666667,180,True,False


In [213]:
read_parquet = False
update_csv = True

if read_parquet:
    stopwatch.start("Reading parquet files")
    fund = pd.read_parquet('amfi_data/',use_nullable_dtypes=True, engine='pyarrow')
    stopwatch.stop(print=True, verbose=0)

    stopwatch.start("Only required schemes")
    scheme = pd.read_excel("schemes.xlsx",sheet_name='Use')
    # required_schemes = list(scheme['Scheme Name'].unique())
    # fund = fund[fund['Scheme Name'].isin(required_schemes)]
    stopwatch.stop()

    stopwatch.start("Saving parquet of required funds")
    fund = (fund
        .merge(scheme[['Scheme Name','Duration Days']],
            on='Scheme Name')
        )
    fund.to_csv("Funds.csv",index=False)
    stopwatch.stop()

if (not read_parquet) & (update_csv):
    stopwatch.start("Updating Schemes Data")
    scheme = pd.read_excel("schemes.xlsx",sheet_name='Use')
    fund = pd.read_csv("Fund.csv")
    fund = (fund
        .drop(columns=['Duration Days'])
        .merge(scheme[['Scheme Name','Duration Days']],
            on='Scheme Name')
        )
    stopwatch.stop()

if read_parquet | update_csv:
    stopwatch.start("Saving Fund")
    fund = fund[['Scheme Code',
        'Scheme Name','Net Asset Value',
        'Date','Duration Days']]
    fund.to_csv("Fund.csv",index=False)
    stopwatch.stop()


stopwatch.start("Preparing Funds df")

fund['date'] = pd.to_datetime(fund['Date']).dt.date
fund.drop(columns=['Date'],inplace=True)
expanded_funds = fund[['Scheme Code','Scheme Name','Duration Days']].drop_duplicates()
expanded_funds['date'] = [list(pd.date_range(dt.date(2010,1,1),end=dt.date.today()).date) for _ in range(expanded_funds.shape[0])]
fund = (expanded_funds
    .explode('date')
    .merge(fund,on=['Scheme Code','Scheme Name','Duration Days','date'],how='left')
    .sort_values(by=['Scheme Name','date']))

fund['Net Asset Value'] = (fund['Net Asset Value']
        .fillna(method='ffill')
        .astype(float))

fund['Scheme Code'] = fund['Scheme Code'].astype(str)
fund.rename(columns={'Net Asset Value':'NAV'},inplace=True)

stopwatch.stop()

In [214]:
min_days_to_maturity = goal[goal['is_payDay']]['days_to_maturity'].min()
max_days_to_maturity = goal[goal['is_payDay']]['days_to_maturity'].max()
available_durations = list(fund['Duration Days'].unique())
available_durations.sort()

duration_mapping = {0:0}
current_pos = 1
for d in range(min_days_to_maturity,max_days_to_maturity+1):
    if d >= available_durations[current_pos]:
        if current_pos < len(available_durations)-1:
            current_pos += 1
    duration_mapping[d] = int(available_durations[current_pos-1])

goal['matched_duration'] = (goal['days_to_maturity']
                    .map(duration_mapping)
                    .fillna(-1)
                    .astype(int)
                    )


In [215]:
goal = goal.merge(fund,left_on=['date','matched_duration'], right_on=['date','Duration Days'],how='left')
goal['units_bought'] = goal['emi'] / goal['NAV']
goal = goal.merge(fund[['Scheme Code','date','NAV']],
    left_on=['endDate','Scheme Code'],right_on=['date','Scheme Code'],suffixes=("","_endDate"),how='left')

goal['value_endDate'] = goal['units_bought'] * goal['NAV_endDate']



In [216]:
goal.groupby(['startDate','length','collection_window'],as_index=False).agg({'goal_endDate':'mean','value_endDate':'sum'})

Unnamed: 0,startDate,length,collection_window,goal_endDate,value_endDate
0,2015-01-01,1y,1d,104641.350211,7.033079e+06
1,2015-01-01,1y,1m,104641.350211,7.033079e+06
2,2015-01-01,1y,7d,104641.350211,7.033079e+06
3,2015-01-01,3y,1d,113164.556962,1.139665e+05
4,2015-01-01,3y,1m,113164.556962,1.139665e+05
...,...,...,...,...,...
23335,2022-03-03,6m,1m,103963.963964,1.013302e+05
23336,2022-03-03,6m,7d,103963.963964,1.013302e+05
23337,2022-03-04,6m,1d,103963.963964,1.013319e+05
23338,2022-03-04,6m,1m,103963.963964,1.013319e+05


In [251]:
goal_collection = goal[goal['is_collectionPeriod']]

goal_collection['current_value'] = v_get_current_value(goal_collection['date'].to_numpy(),
                                goal_collection['startDate'].to_numpy(),
                                goal_collection['length'].to_numpy(),
                                goal_collection['collection_window'].to_numpy())

In [253]:
goal_collection.to_csv("goal_collection.csv",index=False)

In [246]:
startDate = dt.date(2021,9,4)
length = '1y'
collection_window = '7d'
date = dt.date(2022,2,25)

k = goal_collection[(goal_collection['startDate']==startDate)&(goal_collection['length']==length)&(goal_collection['collection_window']==collection_window)]



In [247]:
k

Unnamed: 0,date,startDate,length,collection_window,endDate,goal_endDate,pending_investment,emi,days_to_maturity,is_payDay,...,matched_duration,Scheme Code,Scheme Name,Duration Days,NAV,units_bought,date_endDate,NAV_endDate,value_endDate,current_value
794516,2022-08-30,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,0,False,...,0,,,,,,,,,102227.099888
794517,2022-08-29,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,1,False,...,-1,,,,,,,,,102211.759544
794518,2022-08-28,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,2,False,...,-1,,,,,,,,,102187.704667
794519,2022-08-27,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,3,False,...,-1,,,,,,,,,102173.864672
794520,2022-08-26,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,4,False,...,-1,,,,,,,,,102160.037854
794521,2022-08-25,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,5,False,...,-1,,,,,,,,,102141.88201
794522,2022-08-24,2021-09-04,1y,7d,2022-08-30,106654.343808,0.0,8333.333333,6,False,...,-1,,,,,,,,,102126.368782


In [249]:
startDate = dt.date(2021,9,4)
length = '1y'
collection_window = '7d'
date = dt.date(2022,8,27)

s = goal[(goal['startDate']==startDate)
        &(goal['length']==length)
        &(goal['collection_window']==collection_window)
        &(goal['date']<=date)]

s = s[['Scheme Code','units_bought']].groupby('Scheme Code',as_index=False).sum()
s['date'] = date
s = s.merge(fund[['Scheme Code','date','NAV']],on=['Scheme Code','date'],how='left')
print((s['NAV'] * s['units_bought']).sum())
s

102173.86467225672


Unnamed: 0,Scheme Code,units_bought,date,NAV
0,118961,211.656338,2022-08-27,40.7166
1,119091,21.961274,2022-08-27,4260.0415
