# analyze evestment dat

# Startup

## imports

In [1]:
# imports from core
import statistics
import os
import re
import math
import operator
from functools import partial
import inspect
import logging

# imports regarding stats
import pandas as pd
from pandas.tseries.offsets import *
from pandas.plotting import table
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.offsetbox import AnchoredText
from matplotlib.ticker import FuncFormatter
from matplotlib.gridspec import GridSpec
from datetime import datetime
from scipy import stats


import pdb

## set logger

In [2]:
# get logger
logger = logging.getLogger(__name__)

# set logging level
logger.setLevel(logging.DEBUG)

# set up log level in file
file_handler = logging.FileHandler('.log')

# set logging level on file handler
file_handler.setLevel(logging.DEBUG)

# create a logging format
formatter = logging.Formatter('%(asctime)s;%(levelname)s;%(message)s;')

# set the formatter
file_handler.setFormatter(formatter)

# add the file handler to logger
logger.addHandler(file_handler)

## initialize variables
# set initial comment
logger.info('Starting....')

## set parameters

In [3]:
# set style
plt.style.use('mccm-1')
# annotate plots
annot=False
# set audience type (internal=True; external=False)
audience_internal=True
# set whether files will be saved
save_files=True
# set default folder name
prefix='images'
# rename print function
_p=print
# function to show plots, and save if asked
def show(filename=None, folder='images', display=True, tight_layout=True):
    # make figure tighter
    fig.tight_layout() if tight_layout else None
    # show the plot
    plt.show() if display else None 
    # has a filename been passed
    if filename:
        # yes, save the image
        fig.savefig(folder+'/'+filename)
    # clear the figure to release memory
    plt.clf()

# NCF portfolio diversification charts

In [None]:
# absolute file path
file_path = '/Users/darpanbiswas/Dropbox/01_MCCM/89_DATA/ncf_input_data_01.xlsx'

# load xlsx sheet
ncf_holdings_level_2 = pd.read_excel(file_path, 'Sum')

# drop any columns with errors in all cells
ncf_holdings_level_2=ncf_holdings_level_2.dropna(how='all', axis=1)

In [None]:
def redistribute_sectors(grouped_srs):

    redistributed_sums=[]

    for index, row in grouped_srs.iteritems():

        if '/' in index:

            split_indices = index.split('/')

            num_of_split_indices = len(split_indices)

            for _t_index in split_indices:

                redistributed_sums += [[_t_index, row/num_of_split_indices]]

        else:

            redistributed_sums += [[index, row]]
            
#     pdb.set_trace()
            
    redistributed_df = pd.DataFrame(redistributed_sums)
        
    redistributed_df.columns = ['Sector0PPT', 'NCF cost']
        
    _t_group = redistributed_df.groupby('Sector0PPT').sum().loc[:, 'NCF cost']
    
    return _t_group
        

In [None]:
def pie_label(pct, *args, **kwargs):
    
    return "{:.1f}%".format(pct)

In [None]:
pie_chart_params_set = [
    {'title':'By Asset Class', 'group_by_field':'Strategy'},
    {'title':'By Industry', 'group_by_field':'Sector0PPT'},
    {'title':'By Liquidity', 'group_by_field':'Stage'},
    {'title':'By Geography', 'group_by_field':'Geography'},
]
    
fig, axes = plt.subplots(4, 2, figsize=(20,15), gridspec_kw={'width_ratios':[0.5, 0.5], 'height_ratios':[0.05, 0.45, 0.05, 0.45]})

axes = [
    (axes[0,0], axes[1,0]), 
    (axes[0,1], axes[1,1]), 
    (axes[2,0], axes[3,0]), 
    (axes[2,1], axes[3,1]),
]

for pie_chart_param, (title_axis, chart_axis) in zip(pie_chart_params_set, axes):
        
    _t_grouped_data = ncf_holdings_level_2.groupby(pie_chart_param['group_by_field']).sum().loc[:, 'NCF cost']
    
    if (pie_chart_param['group_by_field'] == 'Sector0PPT'):
    
        _t_grouped_data = redistribute_sectors(_t_grouped_data)
        
    _t_grouped_data = _t_grouped_data.sort_values(0, ascending=False)
    
    _t_grouped_data = _t_grouped_data.loc[_t_grouped_data/_t_grouped_data.sum() > 0.01]
        
    labels = ['{}'.format(index) for index, row in _t_grouped_data.iteritems()]

    patches, texts, autotexts = chart_axis.pie(
        _t_grouped_data, 
        labels=labels,autopct=pie_label, 
        pctdistance=0.75,
        startangle=90,
        counterclock=False)
    
    for text, autotext in zip(texts, autotexts):
        
        text.set_fontsize(15)
        
        autotext.set_fontsize(12)       
    
    chart_axis.axis('equal')
    
    title_axis.get_xaxis().set_visible(False)

    title_axis.get_yaxis().set_visible(False)

    title_axis.set_facecolor('C1')
    
    at = AnchoredText(
        pie_chart_param['title'], 
        loc=10, 
        prop=dict(
            backgroundcolor='C1',
            size=20, 
            color='white',
            weight='bold'
        ))

    title_axis.add_artist(at)
                                                                                      
#     title = axis.set_title(pie_chart_param['title'], fontdict={'fontsize':20, 'fontweight':'bold', 'color':'white'})

#     title.set_bbox(dict(facecolor='C1', alpha=1.0, edgecolor='black', boxstyle='square', pad=0.4))
    
#     bbox_patch = title.get_bbox_patch()
    
#     bbox_patch.set_boxstyle("square", pad=0.0, width=axis.get_window_extent().width )
#     
#     bbox_patch.set_width(10)
    
#     bbox_patch.update()

    
fig.tight_layout()
    
plt.show()

# Handle Sample Excel

In [4]:
# absolute file path
file_path = '/Users/darpanbiswas/Dropbox/01_MCCM/89_DATA/sample_excel.xlsx'



In [5]:
manager_names = [
    'Quan', 
    'AlphaX'
]

In [6]:
combined_manager_holdings_df = pd.DataFrame(
    [],
    columns=[
                            'Realization Status', 
                            'Portfolio Company',
                            'Date',
                            'Sector',
                            'Strategy',
                            'Cost',
                            'Stake',
                            'Realized',
                            'Unrealized',
                            'Multiple',
                            'IRR',
                            'NCF cost',
                            'NCF Realized', 
                            'NCF Unrealized', 
                            'NCF Value', 
                            'Exit Strategy',
                            'Status', 
                            'Fund',
                            'NCF Committed Capital',
                            'Fund Size',
                            'FMV',
                        ])

combined_manager_funds_df = pd.DataFrame(
    [],
    columns=[
        'manager',
        'sector',
        'ncf_committed_capital',
        'manager_fund_size',
    ]
)

In [7]:
def get_manager_parameter(*args, manager_df, parameter_name, **kwargs):

    parameter_locators = {
        'sector':[0,0],
        'ncf_committed_capital':[0,5],
        'manager_fund_size':[0,6],
    }
    
    try:
        
        parameter_value = manager_df.iloc[
            parameter_locators[parameter_name][0], 
            parameter_locators[parameter_name][1],
        ]
        
        return parameter_value
        
    except Exception as e:
    
        logger.debug('Could not find parameter:{0};{1}'.format(parameter_name, e))
        
        return 'error'
    

In [8]:
def return_realized_unrealized_string_array(*args, manager_df, **kwargs):
    
    re_unreal = re.compile('Unrealized.*')
    
    re_total = re.compile('Total.*')
    
    string_array = manager_df.iloc[:,0].values.tolist()
    
    reached_total_row = False
    
    current_group = ''
    
    for index in range(len(string_array)):
        
        _temp = string_array[index]
        
        if reached_total_row:
            
            string_array[index] = 'drop'

        else:

            if type(string_array[index]) == str:
                
                if re_unreal.match(current_group) and re_total.match(string_array[index]):
                
                    reached_total_row = True
                    
                current_group = string_array[index]

                string_array[index] = 'drop'

            else:

                string_array[index] = current_group

        print(_temp, string_array[index])

    return string_array        

### Questions for China Team
- How does the NCF Committed Capital and Fund Size show up; it is not in the Quan tab in the NCF Combined xlsx?
- Why do some "Sector" values map to multiple "Sector0PPT" values?
- What is the "sector" under the Fund name in the Sample Excel file?
- How does Geography and Year show up?

In [9]:
# function to parse manager name
def parse_manager_tab(*args, manager_name, **kwargs):
    
    # read the excel file
    manager_df = pd.read_excel(file_path, sheet_name=manager_name, skiprows=[0]).iloc[:, 1:]

    # get sector, committed capital, and find size data
    sector, ncf_committed_capital, manager_fund_size = \
        [get_manager_parameter(
            manager_df = manager_df, 
            parameter_name = parameter_name,
        ) for parameter_name in ['sector', 'ncf_committed_capital', 'manager_fund_size']]

    # remove first two rows
    manager_df = manager_df.iloc[2:, :]

    # set columns
    manager_df.columns = manager_df.iloc[0]
    
    # rename the first column
    manager_df.columns = ['Realization Status'] + manager_df.columns[1:].values.tolist()

    # drop row that was set as column
    manager_df = manager_df.iloc[1:, :]

    # update first column
    manager_df.iloc[:, 0] = return_realized_unrealized_string_array(manager_df=manager_df)

    # drop rows using the realized / unrealized column
    manager_df = manager_df.loc[manager_df.iloc[:,0] != 'drop',:]

    # add column to indicate manager name
    manager_df.loc[:,'Fund'] = None; manager_df.loc[:,'Fund']=manager_name
    
    # add column to show NCF Committed Capital
    manager_df.loc[:, 'NCF Committed Capital'] = None; manager_df.loc[:, 'NCF Committed Capital'] = ncf_committed_capital
    
    # add column to show Fund Size
    manager_df.loc[:, 'Fund Size'] = None; manager_df.loc[:, 'Fund Size'] = manager_fund_size
    
    # add column to compute FMV at manager level
    manager_df.loc[:, 'FMV'] = None; manager_df.loc[:, 'FMV'] = manager_df.loc[:, 'Realized'] + manager_df.loc[:,'Unrealized']

    # return the update df
    return manager_df

In [10]:
for manager_name in manager_names:

    response = parse_manager_tab(manager_name=manager_name)

    combined_manager_holdings_df = combined_manager_holdings_df.append(response)
    
#     combined_manager_funds_df = combined_manager_funds_df.append(pd.DataFrame(
#         data=[[
#             manager_name,
#             response['sector'],
#             response['ncf_committed_capital'],
#             response['manager_fund_size'],
#         ]],
#         columns=[
#             'manager',
#             'sector',
#             'ncf_committed_capital',
#             'manager_fund_size',
#         ]
#     ))

Realized Investments drop
1 Realized Investments
Realized Investments drop
Unrealized Investments drop
1 Unrealized Investments
2 Unrealized Investments
3 Unrealized Investments
3 Unrealized Investments
4 Unrealized Investments
5 Unrealized Investments
5 Unrealized Investments
6 Unrealized Investments
7 Unrealized Investments
8 Unrealized Investments
9 Unrealized Investments
10 Unrealized Investments
Unrealized Investments drop
Total drop
Unrealized Investments drop
1 Unrealized Investments
2 Unrealized Investments
3 Unrealized Investments
4 Unrealized Investments
5 Unrealized Investments
6 Unrealized Investments
7 Unrealized Investments
5 Unrealized Investments
6 Unrealized Investments
7 Unrealized Investments
8 Unrealized Investments
9 Unrealized Investments
10 Unrealized Investments
Unrealized Investments drop
Total drop
nan drop
* convertible Loans drop


In [11]:
    
combined_manager_holdings_df = combined_manager_holdings_df.reset_index(drop=True)

combined_manager_holdings_df.index.name = 'row_id'
    
combined_manager_funds_df = combined_manager_funds_df.reset_index(drop=True)

combined_manager_funds_df.index.name = 'row_id'

In [12]:
combined_manager_holdings_df.loc[:, 'Geography']='Where do I find this?'

combined_manager_holdings_df.loc[:, 'Year']='Where do I fund this?'

combined_manager_holdings_df.loc[:, 'Sector0PPT']='What is the mapping of Sector to this field?'

combined_manager_holdings_df = combined_manager_holdings_df.rename(
    {
        'NCF cost':'NCF Cost',
        'NCF Value':'NCF FMV',
        'Status':'Stage',
    }, 
    axis=1)

In [13]:
combined_manager_holdings_df = combined_manager_holdings_df.loc[:,[
            'Fund',
            'Geography',
            'Portfolio Company',
            'Date',
            'Year',
            'Sector0PPT',
            'Sector',
            'Strategy',
            'Cost',
            'Fund Size',
            'NCF Committed Capital',
            'NCF Cost',
            'Stake',
            'Realized',
            'NCF Realized',
            'Unrealized',
            'NCF Unrealized',
            'FMV',
            'NCF FMV',
            'Multiple',
            'IRR',
            'Exit Strategy',
            'Stage',
            'Realization Status',
]]

In [15]:
excel_writer = pd.ExcelWriter('combined_quan_alphax.xlsx')
combined_manager_holdings_df.to_excel(excel_writer,'Sum')
excel_writer.save()