In [1]:
# the packages
import pandas as pd
import numpy as np
import networkx as nx

# import my own helper functions
from read import read_sims_result
from clean import cleanup_0IR_exp
from clean import cleanup_network

# Page Rank
from networkx.algorithms.link_analysis.pagerank_alg import pagerank_numpy

# logistic regression
from sklearn.linear_model import LogisticRegression

In [2]:
# ###########################
# Read OIR results, and fit the model
# ###########################
df0 = read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/0622/0IR300s", 32)
df0c = cleanup_0IR_exp(df0, numNode=32, numPeriod=15, numSim=100, balanced=True)

X = df0c[["deposits", "cash", "assets", "credit available", "wealth", "leverage", 
         "dummy-0-leverage",
         "wealth-lag", "deposits-lag", "cash-lag", "assets-lag", "leverage-lag", 
         "credit-available-lag", "credit-issued-lag", "dummy-0-leverage-lag",
         "over-leverage-frequency"]]
y = df0c["default-next"]

final = LogisticRegression(penalty="l1", C=0.007)
final.fit(X,y)

LogisticRegression(C=0.007, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [3]:
# make sure no defaults in 0IR
# sum(df0[df0["defaults due to interest"]
#     +df0["defaults due to negative wealth"]
#     +df0["defaults due to deposit shock"] == 0].loc[:,"dot0":"dot30"].values)

In [4]:
# ###########################
# Read & process positive IR results
# ###########################
df_1 = read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/0625/1IR", 32)
mx_1n = cleanup_network(df_1, numNode=32, numPeriod=15, numSim=50)
df_1c = cleanup_0IR_exp(df_1, numNode=32, numPeriod=15, numSim=50)

In [5]:
# independent variables
independent = ["deposits", "cash", "assets", "credit available", "wealth", "leverage", 
         "dummy-0-leverage",
         "wealth-lag", "deposits-lag", "cash-lag", "assets-lag", "leverage-lag", 
         "credit-available-lag", "credit-issued-lag", "dummy-0-leverage-lag",
         "over-leverage-frequency"]

In [8]:
def weigh_networks(N, model, variables):
    """
    Add weight to network
    Each debt is multiplied by lenders' predicted default probability
    
    Parameters
    ----------
    N: 4D numpy array [n_simulations, n_periods, n_borrowers, n_lenders]
        debt adjacency matrices 
    model: scikit learn LogisticRegression
        model for default probability
    variables: a list of strings
        independent variables for the model
    
    Returns
    ----------
    WN: 4D numpy array [n_simulations, n_periods, n_borrowers, n_lenders]
        new weighted debt adjacency matrices
    """
    
    WN = np.copy(N)
    simNum, periodNum, bankNum, _= N.shape
    
    for s in range(simNum):
        for p in range(1,periodNum-1):
            for b in range(bankNum):
                X = df_1c[np.array(df_1c["sim#"]==s) &
                          np.array(df_1c["period"]==p) & 
                          np.array(df_1c["bankID"]==b)
                         ][variables].values
                if X.any():
                    predicted_default_probability = model.predict_proba(X)[0][1]
                    WN[s, p-1, b] *= predicted_default_probability
                    
    return WN

In [9]:
def my_pagerank_numpy(G, alpha=0.85, personalization=None, weight='weight', dangling=None):
    """
    This is basically pagerank_numpy without normalization.
    """
    from networkx.algorithms.link_analysis.pagerank_alg import google_matrix
    
    if len(G) == 0:
        return {}
    M = google_matrix(G, alpha, personalization=personalization,
                      weight=weight, dangling=dangling)
    # use numpy LAPACK solver
    eigenvalues, eigenvectors = np.linalg.eig(M.T)
    ind = np.argmax(eigenvalues)
    # eigenvector of largest eigenvalue is at ind, normalized
    largest = np.array(eigenvectors[:, ind]).flatten().real
    return dict(zip(G, map(float, largest)))

In [10]:
def apply_to_networks(f, N):
    """
    Calculate Page Rank scores for all the networks 
    
    Parameters
    ----------
    f: function (2D numpy array -> matrix)
        the function to apply to each network (e.g. Page Rank)
    N: 4D numpy array [n_simulations, n_periods, n_borrowers, n_lenders]
        debt adjacency matrices (netowrks)
    
    Returns
    ----------
    PG: 3D numpy array [n_simulations, n_periods, n_banks]
        Page Rank scores
    """
    
    simNum, periodNum, bankNum, _= N.shape
    PG = np.empty((simNum, periodNum, bankNum))
    
    for s in range(simNum):
        for p in range(1,periodNum-1):
            PG[s, p] = np.array(list(f(nx.DiGraph(N[s, p])).values()))
            
    return PG

In [11]:
# ###########################
# Let's add the weight
# ###########################
weighted = weigh_networks(mx_1n, final, independent)

In [12]:
# ###########################
# Let's calculate pagerank
# ###########################
pg_norm = apply_to_networks(pagerank_numpy, weighted)
pg_not_norm = apply_to_networks(my_pagerank_numpy, weighted)

In [11]:
weighted[2,1]

array([[0.        , 0.        , 0.        , 0.39431682, 0.        ,
        0.26287819, 0.2253242 , 0.19715872, 0.17525223, 0.15772703,
        0.        , 0.        , 0.12132828, 0.        , 0.10515115,
        0.09857936, 0.09278048, 0.08762596, 0.08301429, 0.07510797,
        0.06857677, 0.06309069, 0.05841751, 0.05438869, 0.05087965,
        0.04779598, 0.04506478, 0.04262885, 0.04044266, 0.07169397,
        0.06571947],
       [0.84548998, 0.        , 0.        , 1.02956833, 0.        ,
        0.68637969, 0.58832556, 0.51478497, 0.45758673, 0.41182813,
        0.        , 0.        , 0.31679032, 0.29416238, 0.27455155,
        0.25739248, 0.24225151, 0.0228793 , 0.21675182, 0.19610825,
        0.17905519, 0.16473093, 0.15252918, 0.14200984, 0.13284768,
        0.12479616, 0.11766495, 0.11130468, 0.1055965 , 0.18719424,
        0.17159472],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.854701  , 0.732601  , 0.641026  , 0.569801  , 0.512821  ,
      

In [17]:
list(map(sum, pg_norm[2]))

[5e-324,
 0.9999999999999999,
 0.9999999999999997,
 1.0,
 1.0,
 0.9999999999999998,
 1.0,
 0.9999999999999994,
 0.9999999999999998,
 0.9999999999999999,
 0.9999999999999998,
 1.0,
 1.0000000000000004,
 0.9999999999999997,
 5e-324]

In [19]:
list(map(sum, pg_not_norm[2]))

[5e-324,
 -5.349312795049792,
 -5.5175783235522955,
 5.387735920975883,
 -5.455519363980492,
 -4.489524304219675,
 4.256502893411028,
 5.345194283759504,
 -4.035284248932145,
 4.820835260660966,
 -5.520366540145044,
 -4.444792152498663,
 -5.41165333853417,
 5.155045966659313,
 5e-324]

In [13]:
# G = nx.DiGraph(mx_1n[3,2])
# nx.draw(G)

In [14]:
# opq = {"d":3, 90:"pop"}
# np.array(list(opq.values()))

In [15]:
# pg = pagerank_numpy(G)
# print(type(pg))
# for k,v in pg.items():
#     print("{:<3}| {:.8}".format(k,v))