In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from scipy.stats import norm
import datetime as dt
%matplotlib inline

In [7]:
sectordata = pd.read_excel(r'C:\Users\hoang\Desktop\Research Paper\sector.xlsx',index_col='DATE',parse_dates= True)
sectordata['Year'] = sectordata.index.year

In [8]:
funddata = pd.read_excel(r'C:\Users\hoang\Desktop\Research Paper\fund.xlsx',index_col ='DATE', parse_dates= True)
funddata['Year'] = funddata.index.year

In [9]:
sector_name_list = ['S5TELS', 'S5COND', 'S5CONS', 'S5ENRS', 'S5FINL', 'S5HLTH', 'S5INDU',
       'S5INFT', 'S5MATR', 'S5RLST', 'S5UTIL']
fund_name_list = ['OLGAX', 'VSCOX', 'RSEGX', 'ALSAX', 'WMICX', 'IALAX', 'FKSAX', 'MSEGX',
       'WAMCX', 'BMGAX']

In [10]:
# Create a dictionary with name convention is:  fund name_year
fund_dict ={}

for i in fund_name_list:
    for yr in np.arange(2002,2019,1):
        fund_dict[i+'_'+str(yr)] = funddata[i][(funddata['Year'] == yr)]
        
# Create a dictionary with name convention is:  sector name_year
sector_dict ={}

for i in sector_name_list:
    for yr in np.arange(2002,2019,1):
        sector_dict[i+'_'+str(yr)] = sectordata[i][(sectordata['Year'] == yr)]

In [11]:
# Create a sector dictionary with name convention is: sectordata_year ( not breakdown to sector level)

sectordata_dict = {}

for yr in np.arange(2002,2019,1):
    sectordata_dict[str(yr)] = sectordata[sectordata['Year'] == yr]
    
# Create a dictionary with name convention is:  fund name_year ( not breakdown to fund level)
funddata_dict ={}

for yr in np.arange(2002,2019,1):
        funddata_dict[str(yr)] = funddata[(funddata['Year'] == yr)]

In [12]:
# Create dictionary for mean & standard deviation & 95th percentile & 99th percentile for fund:


mean_fund_list = []
std_fund_list = []
var95_fund_list = []
var99_fund_list = []

for i in fund_dict:
    mean_fund_list.append( np.mean(fund_dict[i]) )
    std_fund_list.append( np.std(fund_dict[i]) )
    var95_fund_list.append( np.percentile(fund_dict[i],5) )
    var99_fund_list.append( np.percentile(fund_dict[i],1) )

unique_fund_name_list = [i for i in fund_dict]
fund_dict_summary = {}
    
fund_dict_summary['name'] = unique_fund_name_list
fund_dict_summary['mean'] = mean_fund_list
fund_dict_summary['std'] = std_fund_list
fund_dict_summary['var95'] = var95_fund_list
fund_dict_summary['var99'] = var99_fund_list

In [13]:
# Create dictionary for mean & standard deviation & 95th percentile & 99th percentile for sector:

mean_sector_list = []
std_sector_list = []
var95_sector_list = []
var99_sector_list = []

for i in sector_dict:
    mean_sector_list.append(  np.mean(sector_dict[i]))
    std_sector_list.append(  np.std(sector_dict[i]))
    var95_sector_list.append(  np.percentile(sector_dict[i],5))
    var99_sector_list.append ( np.percentile(sector_dict[i],1))
    
unique_sector_name_list = [i for i in sector_dict]
sector_dict_summary ={}
    
sector_dict_summary['name'] = unique_sector_name_list
sector_dict_summary['mean'] = mean_sector_list
sector_dict_summary['std'] = std_sector_list
sector_dict_summary['var95'] = var95_sector_list
sector_dict_summary['var99'] = var99_sector_list

In [14]:
# Create dictionary for mean & standard deviation & 95th percentile & 99th percentile:  not breakdown to fund details

mean_funddata_dict = {}
std_funddata_dict = {}
var95_funddata_dict = {}
var99_funddata_dict = {}

for i in funddata_dict:
    mean_funddata_dict[i] = np.mean(funddata_dict[i])
    std_funddata_dict[i] = np.std(funddata_dict[i])
    var95_funddata_dict[i] = np.percentile(funddata_dict[i],5)
    var99_funddata_dict[i] = np.percentile(funddata_dict[i],1)

In [15]:
# Create dictionary for mean & standard deviation & 95th percentile & 99th percentile:  not breakdown to fund details

mean_sectordata_dict = {}
std_sectordata_dict = {}
var95_sectordata_dict = {}
var99_sectordata_dict = {}

for i in sectordata_dict:
    mean_sectordata_dict[i] = np.mean(sectordata_dict[i])
    std_sectordata_dict[i] = np.std(sectordata_dict[i])
    var95_sectordata_dict[i] = np.percentile(sectordata_dict[i],5)
    var99_sectordata_dict[i] = np.percentile(sectordata_dict[i],1)

In [16]:
def stats(data):
    stats_table = data.describe()
    return stats_table

In [17]:
def historical_var(data, confident):
    
    """
    Simulate historical value at risk
    
    Step 1: Calculate the mean of return
    Step 2: Calculate the standard deviation for return series
    Step 3: Calculate percentile at (100-confident level) 
    
    """
    mean_ret = np.mean(data)
    std = np.std(data)
    hist_var = np.percentile(data,100-confident)
    return abs(hist_var)

def parametric_var(data, confident):
    
    """
    calculate parametric value at risk, assume normal distribution
    
    Step 1: Calculate the mean of return
    Step 2: Calculate the standard deviation for return series
    Step 3: Generate normal distribution based on the above parameters
    Step 4: Calculate percentile at (100-confident level) 
    
    """
    mean_ret = np.mean(data)
    std = np.std(data)
    z = norm.ppf(1-confident/100)
    para_var = abs(z*std)
    return para_var
    
def montecarlo_var(data,confident):
    
    """
    Run Monte Carlo simulation based on estimated parameters, assume normal distribution
    
    """
    mean = np.mean(data)
    std = np.std(data)
    np.random.seed(0)
    n_sim = 100000
    sim_prc = np.random.lognormal(mean, std,n_sim)
    sim_ret = np.log(sim_prc)
    sim_var = np.percentile(sim_ret,100 - confident)
    return abs(sim_var)

def var_cov_var(data,confident):
    """
    Estimate the variance_covariance value at risk, assume normal distribution
    
    Step 1: Calculate the mean of return
    Step 2: Calculate the standard deviation for return series
    Step 3: 
    
    """
    mean = np.mean(data)
    std = np.std(data)
    
    alpha = norm.ppf(1-confident/100, mean, std)
    var_cov_var =  abs(alpha)

    return var_cov_var

In [18]:
def do_all(fund_name,year):
    
    parameters = {}
    
    """ x is fund data"""
    """ y is the sectordata_np"""
    
    y = fund_dict[fund_name+'_'+str(year)]
        
    x = np.array(sectordata_dict[str(year)].drop(columns = 'Year'))
    
    reg = LinearRegression().fit(x,y)
    
    parameters['score'] = reg.score(x,y)
    #parameters['coefficient'] = reg.coef_
    rep_port = np.matmul(x,reg.coef_)
    parameters['hist_var95'] = historical_var(y,95)
    parameters['hist_var99'] = historical_var(y,99)
    parameters['para_var95'] = parametric_var(y,95)
    parameters['para_var99'] = parametric_var(y,99)
    parameters['monte_var95'] = montecarlo_var(y,95)
    parameters['monte_var99'] = montecarlo_var(y,99)
    parameters['cov_var95'] = var_cov_var(y,95)
    parameters['cov_var99'] = var_cov_var(y,99)
    parameters['decom_var95'] = montecarlo_var(rep_port,95)
    parameters['decom_var99'] = montecarlo_var(rep_port,99)
    
    return parameters

In [19]:
# Simulate all data

var_dict = {}

for i in fund_name_list:
    for yr in np.arange(2002,2019,1):
        var_dict[i + '_' + str(yr)] = do_all(i,yr)


In [20]:
# Convert to dataframe

df_var_calculation = pd.DataFrame.from_dict(var_dict)
df_fund_summary = pd.DataFrame.from_dict(fund_dict_summary)
df_sector_summary = pd.DataFrame.from_dict(sector_dict_summary)

In [20]:
# Export to csv


df_var_calculation.to_csv(r'C:\Users\hoang\Desktop\Research Paper\Output\var_calculation.csv')
                          
df_fund_summary.to_csv(r'C:\Users\hoang\Desktop\Research Paper\Output\df_fund_summary.csv')
                          
df_sector_summary.to_csv(r'C:\Users\hoang\Desktop\Research Paper\Output\df_sector_summary.csv')