In [1]:
import pdb
from datetime import date, timedelta
import seaborn as sns
import matplotlib.pyplot as plt
from copy import deepcopy
from functools import reduce
import datetime
import pandas as pd
import numpy as np
from re import sub


def dollar_string_to_float(money):
    return float(sub(r'[^\d.]', '', money))


### Company Correlation

##### We compute a correlation matrix for all companies or entities which are being invested by the credit funds borrowing money from Goldfinch. The sheet used is company_info.csv. For every company, there is a factor analysis for the sheet. For each company pair, we calculate the correlation by the following formula. 

###### c(i,j) = sum of factors which have the same value / total number of factors

In [2]:
def get_company_correlation(company_info_df):
    """
    Calculates correlation matrix between companies using the formula above. The factors are currently hardcoded-
    Sector, Company Geography and Customer Geography. For adding more factors, add them in the sheet, and will have
    to add logic to take those new factors into account

    company_info_df : pandas.Dataframe of the company information  

    Returns corr_matrix which is a dict of dict where corr_matrix[company1][company2] is a number which represents
    the correlation    
    """
    columns = ['Company Name', 'Customer Geography',
               'Sector', 'Company Geography']
    df = company_info_df[columns]
    corr_matrix = dict()
    for index, row in df.iterrows():
        for index1, row1 in df.iterrows():
            term1 = row['Sector'] == row1['Sector']
            term2 = row['Company Geography'] == row1['Company Geography']
            term3 = len(set(row['Customer Geography'].split(',')).intersection(
                set(row1['Customer Geography'].split(','))))
            term3 = term3/len(set(row['Customer Geography'].split(',')
                                  ).union(set(row1['Customer Geography'].split(','))))
            if row['Company Name'].strip().lower() not in corr_matrix.keys():
                corr_matrix[row['Company Name'].strip().lower()] = dict()
            corr_matrix[row['Company Name'].strip().lower(
            )][row1['Company Name'].strip().lower()] = (term1 + term2 + term3)/3
    return corr_matrix


## Pool correlation

#### We compute a correlation matrix for all lending pools. The sheet used is pool_info.csv. For every lending pool, we look at the portfolio of the companies they invest in. For simplicity, we assume that the amount of money put into a company by a lending pool is equal (can be changed based if we have the information. The correlation is computed in the following way

#### Consider two lending pools - pool a invests in companies (a_1,a_2,a_3,...,a_i) and pool b invests in companies (b_1,b_2,b_3....., b_j). The correlation between pool a and pool b will be computed as 

$$ \frac{\sum_{n=1}^{i} \sum_{m=1}^j a_n b_m}{mn} $$

In [3]:
def get_pool_correlation(pool_info_df, company_correlation_dict):
    """
    Calculates correlation matrix between pools using the formula above. It takes into company correlation calculated
    earlier. 

    pool_info_df : pandas.Dataframe of the pool information  
    company_correlation_dict : dict returned from get_company_correlation function

    Returns pool_corr_matrix which is a matrix where pool_corr_matrix[i][j] is a number which represents
    the correlation between pool i and j
    """

    pool_corr_matrix = np.zeros((len(pool_info_df), len(pool_info_df)))
    for index1, row1 in pool_info_df.iterrows():
        if len(row1['Borrower Companies']) == 0:
            continue
        companies1 = list(filter(lambda x: x.strip() != "",
                          row1['Borrower Companies'].split(',')))
        for index2, row2 in pool_info_df.iterrows():
            if len(row2['Borrower Companies']) == 0:
                continue
            companies2 = list(filter(lambda x: x.strip() !=
                              "", row2['Borrower Companies'].split(',')))
            corr = 0
            for company1 in companies1:
                for company2 in companies2:
                    corr += company_correlation_dict[company1.strip(
                    ).lower()][company2.strip().lower()]
            corr = corr/(len(companies1)*len(companies2))
            pool_corr_matrix[index1][index2] = corr
    return pool_corr_matrix


#### The lending pool class simulates the payments and risk factors for a lending pool based on the data provided. The functions are explained respectively

In [4]:
class LendingPool:
    def __init__(self, pool_info: dict):
        """
        Initializes a lending pool given pool info dict

        pool_info : dict containing various information about the pool - Name, Principal, Maturity Date
            and Payment Period        
        """
        for key in pool_info.keys():
            # Assign all the values in the dict
            setattr(self, key, pool_info[key])
        # Get all payment values and dates given the info
        self.payments = self.get_payments(self.amortization)
        # Get the implied risk factor based on financing rate
        self.risk_factor = self.get_implied_risk_factor()

    def get_payments(self,amortization):
        """
        Computes the payments for a lending pool. The function assumes uniform payments, that is the total
        principal and interest payments are amortized uniformyly.

        For eg with a principal of 100 dollars, an interest rate of 15%, then total amount to be paid over 
        one year maturity is 115. 115 will be divided into equal payments. 
        One can also model uneven payments, for example in a bullet amortization, the principal is paid entirely 
        in the last payment. Hence the 15 dollars will be divided into equal interest payments and on the last
        day 100 dollars will also be repaid.         

        Returns a dict with date as keys and payment values on that date as values.
        """
        edate = self.maturity_date
        sdate = date.today()
        daily_rate = self.rate/365
        total_interest = (self.maturity_date-date.today()).days*daily_rate
        payment_dates = [
                edate - timedelta(days=self.payment_period*i) for i in range(int((edate-sdate).days/self.payment_period))]           
        if amortization == "Bullet":
            payment_dict = dict([(date, total_interest/len(payment_dates)) for date in payment_dates])
            payment_dict[edate] = payment_dict[edate] + 1 # Add principal to last date
            return payment_dict
        elif amortization == "Equal":
            total_payment = total_interest + 1 # Divide equal payments in all dates
            return dict([(date, total_payment/len(payment_dates)) for date in payment_dates])

    def get_implied_risk_factor(self):
        """
        Computes the implied risk factor of the pool given financing rate. Uses the Carapace premium formula, 
        equates 1-e^(-lambda * T) = r where r is the financing rate, T is the number of days till expiry and lambda
        is the risk factor.

        Returns the implied risk factor  
        """
        days_till_maturity = (self.maturity_date-date.today()).days
        return -1*np.log(1-self.rate)/days_till_maturity


### The simulation engine contains all the simulation logic. The documentation is in each function below

In [5]:
class SimulationEngine:
    def __init__(self, pool_array, corr_matrix):
        """
        Initializes a simulation engine given an array of lending pools and a correlation matrix for the pools

        pool_array : array of lending pools to simulate
        corr_matrix : correlation matrix for the lending pool       
        """
        self.pool_array = pool_array[:]
        self.corr_matrix = corr_matrix
        self.simulation_dates = self.get_simulation_dates()  # Get simulation dates
        self.initialize_state()  # Initialize the state

    def get_simulation_dates(self):
        """
        Calculate simulation dates given lending pools.
        The logic is to take the union of all payment dates for each lending pools

        Returns a sorted numpy array of the simulation dates        
        """
        return np.sort(reduce(np.union1d, [list(self.pool_array[i].payments.keys()) for i in range(len(self.pool_array))]))

    def initialize_state(self):
        """
        Initializes the state of the engine.
        State contains 3 things - current risk factors of the pool, the information about whivh pool are defaulted
        and the dates on which the pool have defaulted        
        """
        number_of_pools = len(self.pool_array)
        curr_risk_factors = np.array([self.pool_array[i].risk_factor for i in range(
            number_of_pools)])  # Current risk factors
        # CUrrent default state, the ith index corresponds to if
        curr_default_state = np.zeros(number_of_pools)
        # pool i has defaulted, at the start everything is 0
        # No pool has defaulted so assigning -1 for each
        default_date = [-1 for _ in range(number_of_pools)]
        self.state = {"curr_risk_factors": curr_risk_factors, "curr_default_state": curr_default_state,
                      "default_date": default_date}

    def simulate_paths(self, end_date, num_paths):
        """
        Simulates the portfolio till the end date repeating it for num_paths times

        end_date : Date till simulation happens
        num_paths : Number of times to simualate        
        """
        self.paths_info = []  # Initialize a list to store info
        self.end_date = end_date
        relevant_simulation_dates = list(
            filter(lambda x: (x <= end_date), self.simulation_dates))  # Filter simulation
        # dates before end date
        maturity_dates = [
            self.pool_array[i].maturity_date for i in range(len(self.pool_array))]
        # Assign maturity dates to an array
        # Copy the initial state for reset
        initial_state = deepcopy(self.state)
        for path in range(num_paths):
            self.simulate_one_path(
                relevant_simulation_dates, maturity_dates)  # Simulate one path
            # Append the result into our container
            self.paths_info.append(deepcopy(self.state))
            # print("Path Done %s"% path)
            self.state = deepcopy(initial_state)  # Reset the engine

    def simulate_one_path(self, simulation_dates, maturity_dates):
        """
        Contains the logic for simulating one path.

        We loop through all dates, starting from the first date. For each (prev_date,curr_date) pair, the probability
        of survival of a pool is given by e^(-risk factor* (curr_date-prev_date)), given by the Poisson distribution
        If the pool has defaulted, we assign curr_date to the date of its default, and change its state to 1.
        And then we increase the risk factors of other pools based on the correaltion matrix. The increase 
        is defined as - risk_factor(i) = risk_factor(j)*(1+c(i,j)) where j is the defautled pool and c(i,j) is the
        correlation between i and j

        simulation_dates : array containing dates to simulate
        maturity_dates : array containing maturity dates of the loans
        """
        prev_date = simulation_dates[0]  # First date
        for curr_date in simulation_dates[1:]:  # loop through the array
            survival_probability = np.exp(
                -1*self.state['curr_risk_factors']*(curr_date-prev_date).days)
            # survival probability vector, where ith index represents the probability
            # of lending pool i survive
            # Sample from the survival probability
            default_today = np.random.binomial(1, 1-survival_probability)
            self.state['curr_default_state'] = np.maximum(
                default_today, self.state['curr_default_state'])
            # Update the default state
            risk_factor_jump = reduce(
                lambda a, b: a*b, 1+(self.corr_matrix*default_today).T)
            # Compute the risk factor jump for each pool
            self.state['curr_risk_factors'] = self.state['curr_risk_factors'] * \
                risk_factor_jump
            # Multiply the risk factor jump
            # Check which pool has defaulted today
            default_loans = np.where(default_today == 1)
            for i in default_loans[0]:
                self.state['default_date'][i] = curr_date  # Assign todays date
                # Assign risk factor to 0 for future as it is already done
                self.state['curr_risk_factors'][i] = 0
                # print("Pool %s defaulted on date %s"%(i,curr_date))

            prev_date = curr_date
            indices = np.array([i for i, x in enumerate(
                maturity_dates) if x == curr_date])  # Check pools
            # maturing today
            if len(indices) > 0:  # If pool has matured, assign risk factor to 0
                self.state['curr_risk_factors'][indices] = 0


#### Analysis Engine contains the logic for the analysing the results of the simulations. The code behind visualising and interpreting various measures from the data is written here. It can also be extended further with more analytics.

In [6]:
class AnalysisEngine:
    def __init__(self, simulation_engine):
        """
        Initialise the engine.

        simulation_engine : A SimulationEngine object which has already been ran    
        """
        self.simulation_engine = simulation_engine

    def default_probabilities_pool(self):
        """
        Computes the probability of a pool defaulting given simulation results.

        The probability for a pool i is given by number of paths pool i defaults in divided by total paths

        Returns a array with ith index representing the probability for pool i          
        """
        paths_info = self.simulation_engine.paths_info
        default_states = [paths_info[i]['curr_default_state']
                          for i in range(len(paths_info))]
        default_state_sum = reduce(lambda a, b: np.add(a, b), default_states)
        return default_state_sum/len(paths_info)

    def capital_loss(self, initial_investment):
        """
        Computes the capital loss for a given initial investment on each path

        Initial investment should be constructed carefully.
        If a protection seller invests 100 dollars in Carapace pool, assuming the proportion of protection
        buyer demand for all the pools is uneven, the initial investment in each pool will have to be scaled 
        appropriately.

        For eg if the buyer demand proportion is [0.1,0.2,0.5,0.2] which means 10% for first pool, 20%
        for second pool, the somebody investing 100 dollars will have an initial investment of
        [10,20,50,20]. Very important to get this right so that the numbers you get are correct.    

        Returns capital_losses which is an array representign the capital loss pathwise
        """
        pool_array = self.simulation_engine.pool_array  # Load the pool array
        capital_losses = []  # Initialize the capital losses array
        end_date = self.simulation_engine.end_date  # End date of the simulation engine
        for path_info in self.simulation_engine.paths_info:  # Loop through each path
            capital_loss = 0
            investment = 0
            default_date = path_info['default_date']
            for i, pool in enumerate(pool_array):  # Iterate through each pool
                payments = pool.payments
                # Check when it defaulted, and remove the payments after that date
                if default_date[i] != -1:
                    filtered_payments = {
                        date: payments[date] for date in payments.keys() if date < default_date[i]}
                    capital_loss += initial_investment[i]*(
                        sum(payments.values()) - sum(filtered_payments.values()))
            # Append the capital loss for this path
            capital_losses.append(capital_loss)
        return capital_losses

    def visualize_capital_losses(self, initial_investment):
        """
        Plot the distribution of capital loss, and compute various order statistics

        Initial investment should be constructed carefully.
        If a protection seller invests 100 dollars in Carapace pool, assuming the proportion of protection
        buyer demand for all the pools is uneven, the initial investment in each pool will have to be scaled 
        appropriately.

        For eg if the buyer demand proportion is [0.1,0.2,0.5,0.2] which means 10% for first pool, 20%
        for second pool, the somebody investing 100 dollars will have an initial investment of
        [10,20,50,20]. Very important to get this right so that the numbers you get are correct. 

        """
        initial_investment_total = sum(initial_investment)
        capital_loss = self.capital_loss(initial_investment)
        capital_loss_percentage = [
            capital_loss[i]/initial_investment_total for i in range(len(capital_loss))]
        fig, ax = plt.subplots(figsize=(10, 10))
        ax.set(xlabel='Capital Loss Percentage',
               title='Capital Loss Percentage Probability')
        sns.histplot(capital_loss_percentage,
                     stat='probability', ax=ax, bins=50)
        percentiles = [10, 25, 50, 75, 99]
        capital_loss_percentile_values = [np.percentile(
            capital_loss_percentage, elem) for elem in percentiles]
        print("Initial investment total is ", initial_investment_total)
        for index, val in enumerate(capital_loss_percentile_values):
            print("%s percentile capital loss is %s" %
                  (percentiles[index], val*initial_investment_total))
        print("Mean capital loss is ", np.mean(
            capital_loss_percentage)*initial_investment_total)
