# Basic Documentation

In [5]:
# This module is coded for external managers group in UNJSPF. 
# This module provides the various statistics for portfolio construction. 

# Please be advised that the return input data is assumed to be monthly data.
# The input data file should be in shape like below :
# --------------------------------------------------
# Dates        Funds1   Funds2 ...   Funds 10
# YYYY-MM-DD   return   return       return
# --------------------------------------------------

# You can select the start/end date of the analysis.
# You can select the name of the fund.

# Select Start Date

In [None]:
Start_Date = '3/1/2018'

# Select End Date

In [None]:
End_Date = '2/1/2019'

# Select Name of the Fund or Select 'All'

In [None]:
Fund_Name = 'All'

# Imports required libraries

In [7]:
import pandas as pd
import numpy as np
import seaborn
import matplotlib.pyplot as plt

# Reads Data from csv file 

def read_data(file_path): 
    """
    This function 
    1) imports and cleans the data (removes N/A value)
    2) converts the data type as dictionary with dates as a key
       {'dates':return}
    3) calculates the data points for each funds, shows the first 10 data of 
    each funds, and shows funds name
     
    *** Input of this function : file_path
    ex) file_path = 'C:/Users/intern9/Desktop/returns.csv'
    
    The input data file should be in shape like below :
    --------------------------------------------------
        Dates        Funds1   Funds2 ...   Funds 10
        YYYY-MM-DD   return   return       return
    --------------------------------------------------
    
    *** Output of this function : returns for each funds in dictionary type,
    list of number of assets(num_assets), list of funds name()
    """
    raw_data = pd.read_csv(file_path)

    raw_data.set_index('dates', drop = True, inplace = True)
    #number_funds = len(raw_data.columns)
    name_funds = raw_data.columns.tolist()

    return raw_data, name_funds

def main_output(start_date = False, end_date = False):
        
    # Simple manipulation of the data    
    # Pair the inputs by funds and its benchmarks
    data, name_funds = read_data(file_path)
    paired_names = [] # list of list
    input_data_fund = []

    # Run output by each fund and store the results in one tab
    
    # if start date and end date are not given, then generate the remaining part

    # Gather all tabs an output this in a spreadsheet
 
    final_output = pd.dataFrame()
    
    return final_output


# the input is one single fund and its benchmark fund (if given)
def output_contents(data, manager_type = 'Investments'):
    
    key_outputs = ['Number of Points', 'Annualized Return' , 'Std' , 'Beta' , 
                   'Sharp Ratio' , 'Skewness' , 'Min' , 'Max', 'Max DrawDown',
                   'Recovery Time', 'Correlation /w Benchmark', 'Tracking Err',
                   'Active Return', 'Cumulative Return', 'Up Market', 'Dn Market', 
                   'Manager Type' ]

    dict_outputs = {el:0 for el in key_outputs}
    max_dd, rec_per = main_max_dd_single_run(data,'asset_a')
    fund_data = data['asset_a']
    benchmark_data = data['benchmark_asset_a']
    
    # generate inputs 
    dict_outputs['Number of Points'] = fund_data.count()
    dict_outputs['Annualized Return'] = fund_data.count()
    dict_outputs['Std'] = fund_data.std()
    dict_outputs['Beta'] = fund_data.count()
    dict_outputs['Sharp Ratio'] = fund_data.count()
    dict_outputs['Skewness'] = fund_data.skew()
    dict_outputs['Min'] = fund_data.min()
    dict_outputs['Max'] = fund_data.max()
    dict_outputs['Max DrawDown'] = max_dd
    dict_outputs['Recovery Time'] = rec_per
    dict_outputs['Correlation /w Benchmark'] = fund_data.corr(benchmark_data)
    dict_outputs['Tracking Err'] = 0
    dict_outputs['Active Return'] = 0
    dict_outputs['Cumulative Return'] = fund_data.count()
    dict_outputs['Up Market'] = fund_data.count()
    dict_outputs['Dn Market'] = fund_data.count()
    dict_outputs['Manager Type'] = manager_type
    
    return dict_outputs


def calc_annualized_return():
    pass


def calc_beta():
    pass


def calc_sharp_ratio():
    pass


def calc_trackingError():
    pass


def calc_activeRet():
    pass


def calc_CumRet():
    pass


def calc_upMarket():
    pass


def calc_dnMarket():
    pass


def data_length(data):
    
    lengths = []
    for i in data:        
        lengths.append(len(cleared_fund_data(data, i)))
    
    return lengths


def cleared_fund_data(data, fund_name):
    
    series_data = data[fund_name].dropna() 
        
    return series_data

def return_to_level(return_data):
    level_data = [1]
    return_data.dropna()
    
    for i in range(len(return_data)):
        next_level = (1 + return_data[i]) * level_data[i]
        level_data.append(next_level)
                     
    return level_data


def max_drawdown(level_data):
    #xs = np.random.randn(n).cumsum()
    i = np.argmax(np.maximum.accumulate(level_data) - level_data) # end of the period
    j = np.argmax(level_data[:i]) # start of period

    max_dd = (level_data[i]-level_data[j])/level_data[j]
    points = [[i,j],[level_data[i], level_data[j]]]
    
    return max_dd, points
    
def draw_max_dd(level_data, dates, points, fund_name):
    
    '''
    Note that the input "points" should be 2dim list with the first list x axis
    '''    

    plt.plot(level_data)
    plt.title('Maximum Draw Down: ' + fund_name)
    plt.xlabel('Dates')
    plt.ylabel('Time Series - normalized')
    plt.plot(points[0], points[1], 'o', color='Red', markersize=10)
    plt.show()



def recovery_period(level_data, fund_name):
    dd = np.maximum.accumulate(level_data) - level_data
    i = np.argmax(dd) 
    ldd = dd[i:].tolist()
    try: 
        rec_per = ldd.index(0.0)
    except ValueError:
        print("Recovery did not happen for " + fund_name)
        rec_per = len(dd)-i
        
    return rec_per
    

def draw_hist(return_data, fund_name, num_bins = 5):

    plt.hist(return_data, num_bins)
    plt.title('Return Histogram: ' + fund_name)
    plt.xlabel('returns')
    plt.ylabel('frequency')
    
    
    plt.savefig('hist_'+fund_name+'.png')
    plt.show()
    return 0



def main_max_dd_single_run(data, fund_name, draw_hist_flag = True):
    return_data = cleared_fund_data(data, fund_name)
    level_data = return_to_level(return_data)
    max_dd, point = max_drawdown(level_data)
    draw_max_dd(level_data, return_data.index, point, fund_name)
    rec_per = recovery_period(level_data, fund_name)
    if draw_hist_flag == True: draw_hist(return_data, fund_name, 25)
    return max_dd, rec_per



def main_max_dd_batch_run(data):
    max_dds = []
    rec_pers = []
    for i in data: 
        max_dd, rec_per = main_max_dd_single_run(data,i)
        max_dds.append(max_dd)
        rec_pers.append(rec_per)
    
    return max_dds, rec_pers


if __name__=="__main__":
    file_path = 'C:/Users/intern9/Desktop/1yr_return.csv'
    data, name_funds = read_data(file_path)
    #maximum_dd, recovery_periods = main_max_dd_batch_run(data)
    dummy = output_contents(data)