In [1]:
# the packages
import pandas as pd
import numpy as np
import networkx as nx

# import my own helper functions
from read import read_sims_result
from clean import cleanup_0IR_exp
from clean import cleanup_network

# Page Rank
from networkx.algorithms.link_analysis.pagerank_alg import pagerank_numpy

# logistic regression
from sklearn.linear_model import LogisticRegression

In [None]:

LogisticRegression

In [3]:
# ###########################
# Read OIR results, and fit the model
# ###########################
df0 = read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/0622/0IR300s", 32)
df0c = cleanup_0IR_exp(df0, numNode=32, numPeriod=15, numSim=100, balanced=True)

X = df0c[["deposits", "cash", "assets", "credit available", "wealth", "leverage", 
         "dummy-0-leverage",
         "wealth-lag", "deposits-lag", "cash-lag", "assets-lag", "leverage-lag", 
         "credit-available-lag", "credit-issued-lag", "dummy-0-leverage-lag",
         "over-leverage-frequency"]]
y = df0c["default-next"]

final = LogisticRegression(penalty="l1", C=0.007)
final.fit(X,y)

LogisticRegression(C=0.007, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [4]:
# make sure no defaults in 0IR
# sum(df0[df0["defaults due to interest"]
#     +df0["defaults due to negative wealth"]
#     +df0["defaults due to deposit shock"] == 0].loc[:,"dot0":"dot30"].values)

In [56]:
# ###########################
# Read & process positive IR results
# ###########################
df_1 = read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/0625/1IR", 32)
mx_1n = cleanup_network(df_1, numNode=32, numPeriod=15, numSim=50)
df_1c = cleanup_0IR_exp(df_1, numNode=32, numPeriod=15, numSim=50)

In [44]:
mx_1n.shape

(50, 15, 31, 31)

In [45]:
df_1c.size

456000

In [48]:
Q=df_1c[np.array(df_1c["sim#"]==2) & np.array(df_1c["period"]==3) & np.array(df_1c["bankID"]==0)][independent].values
Q

array([[ 0.        ,  0.        , 17.0429    ,  0.102302  , 12.3833    ,
         0.10387791,  0.        , 11.3214    ,  6.65258   ,  0.        ,
        29.9801    ,  0.88825003,  3.95216   , 11.3214    ,  0.        ,
         0.        ]])

In [49]:
final.predict_proba(Q)

array([[0.91454575, 0.08545425]])

In [50]:
P=final.predict_proba(Q)[0][1]
P

0.08545424735769748

In [51]:
A=df_1[np.array(df_1["sim#"]==2) 
       & np.array(df_1["period"]==3) 
       & np.array(df_1["bankID"]==0)].loc[:,"dot0":"dot30"].values
A

array([[0.       , 0.211184 , 0.       , 0.125881 , 0.0418506, 0.       ,
        0.       , 0.126229 , 0.       , 0.114019 , 0.107985 , 0.       ,
        0.0970285, 0.0921673, 0.0876993, 0.0835951, 0.       , 0.0763501,
        0.0731479, 0.       , 0.0625411, 0.       , 0.0545463, 0.0512541,
        0.0483304, 0.0457176, 0.       , 0.0412483, 0.039323 , 0.       ,
        0.060337 ]])

In [52]:
A *= P
A

array([[0.        , 0.01804657, 0.        , 0.01075707, 0.00357631,
        0.        , 0.        , 0.0107868 , 0.        , 0.00974341,
        0.00922778, 0.        , 0.0082915 , 0.00787609, 0.00749428,
        0.00714356, 0.        , 0.00652444, 0.0062508 , 0.        ,
        0.0053444 , 0.        , 0.00466121, 0.00437988, 0.00413004,
        0.00390676, 0.        , 0.00352484, 0.00336032, 0.        ,
        0.00515605]])

In [47]:
mx_1n[2,2,0]

array([0.        , 0.00154216, 0.        , 0.00091924, 0.00030561,
       0.        , 0.        , 0.00092178, 0.        , 0.00083262,
       0.00078855, 0.        , 0.00070854, 0.00067305, 0.00064042,
       0.00061045, 0.        , 0.00055754, 0.00053416, 0.        ,
       0.0004567 , 0.        , 0.00039832, 0.00037428, 0.00035293,
       0.00033385, 0.        , 0.00030121, 0.00028715, 0.        ,
       0.00044061])

In [None]:
# independent variables
independent = ["deposits", "cash", "assets", "credit available", "wealth", "leverage", 
         "dummy-0-leverage",
         "wealth-lag", "deposits-lag", "cash-lag", "assets-lag", "leverage-lag", 
         "credit-available-lag", "credit-issued-lag", "dummy-0-leverage-lag",
         "over-leverage-frequency"]

In [63]:
def weighted_network(N, model, variables):
    """
    Add weight to network
    Each debt is multiplied by lenders' predicted default probability
    
    Parameters
    ----------
    N: 4D numpy array
        debt adjacency matrix 
    model: scikit learn LogisticRegression
        model for default probability
    variables: a list of strings
        independent variables for the model
    
    Returns
    ----------
    WN: 4D numpy array
        new weighted debt adjacency matrix 
    """
    
    WN = np.copy(N)
    simNum, periodNum, bankNum, _= N.shape
    
    for s in range(simNum):
        for p in range(1,periodNum-1):
            for b in range(bankNum):
                X = df_1c[np.array(df_1c["sim#"]==s) &
                          np.array(df_1c["period"]==p) & 
                          np.array(df_1c["bankID"]==b)
                         ][variables].values
                if X.any():
                    predicted_default_probability = model.predict_proba(X)[0][1]
                    WN[s, p-1, b] *= predicted_default_probability
                    
    return WN

In [64]:
# ###########################
# Let's add the weight
# ###########################

print(mx_1n[2,2,5])
print(mx_1n[2,2,15])
weighted = weighted_network(mx_1n, final, independent)
print(weighted[2,2,5])
print(weighted[2,2,15])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0.]
[0.       0.753316 0.       0.449028 0.       0.       0.       0.450272
 0.       0.406718 0.385193 0.       0.34611  0.32877  0.312832 0.
 0.       0.272349 0.260926 0.       0.22309  0.       0.194572 0.182829
 0.172399 0.163079 0.       0.147137 0.140269 0.       0.215228]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0.]
[0.         0.24504744 0.         0.14606508 0.         0.
 0.         0.14646974 0.         0.13230199 0.12530008 0.
 0.11258671 0.10694615 0.10176165 0.         0.         0.08859287
 0.08487706 0.         0.07256932 0.         0.06329266 0.05947276
 0.05607996 0.05304824 0.         0.04786244 0.04562834 0.
 0.07001188]


In [12]:
opq = np.array([2, 5 ,6])
opq * 0.4

array([0.8, 2. , 2.4])

In [13]:
G = nx.DiGraph(mx_1n[3,2])
nx.draw(G)

In [14]:
pg = pagerank_numpy(G)
for k,v in pg.items():
    print("{:<3}| {:.8}".format(k,v))

0  | 0.035259265
1  | 0.030456189
2  | 0.030456189
3  | 0.030456189
4  | 0.030456189
5  | 0.030456189
6  | 0.030456189
7  | 0.030456189
8  | 0.035648913
9  | 0.035380293
10 | 0.035119624
11 | 0.033111168
12 | 0.034646382
13 | 0.034436413
14 | 0.030456189
15 | 0.030456189
16 | 0.03390325
17 | 0.030456189
18 | 0.033614979
19 | 0.030456189
20 | 0.033156904
21 | 0.03297282
22 | 0.032811638
23 | 0.030456189
24 | 0.032543206
25 | 0.032430377
26 | 0.032328977
27 | 0.032237372
28 | 0.03140625
29 | 0.030456189
30 | 0.033061711
