# Time snapshots for network analysis

This notebook creates time snapshots of the given portfolio, in order to study how network structure could influence impairments/overdues diffusion.
In the previous steps, impairments and overdues has been calculated using as report date the date in which the data was received.
Using snapshots, it is possible to perform this analysis overtime observing diffusion.

## Data import

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import os

from features_utils import *

In [2]:
#Instruments dataset
#import data
user = os.environ["USERNAME"]

#from home
#filename = "instrumentsdf.pkl"
#datafolder = "C:/Users/{}/Dropbox/University/MscDataScience_Birkbeck/thesis_project/data/".format(user)

#from work
filename = "09272018_instruments.pkl"
datafolder = "C:/Users/{}/Tradeteq Dropbox/Tradeteq Team/Clients/#GoFactoring/data analysis/".format(user)

inst = pd.read_pickle(datafolder+filename)
inst.head().transpose()

uid,2744:79/231,2861:79/232,2932:79/233,1472:489/688,2042:512/645
customer_id,2004008,2004008,2004008,2004009,2004009
customer_name_1,jobs united GmbH,jobs united GmbH,jobs united GmbH,PM Personal GmbH,PM Personal GmbH
debtor_id,79,79,79,489,512
debtor_name_1,Quadroni Linard,Quadroni Linard,Quadroni Linard,Style Interiors,Elektropartner AG
invoice_number,2744,2861,2932,1472,2042
invoice_date,2013-07-23 00:00:00,2013-07-30 00:00:00,2013-08-06 00:00:00,2013-08-13 00:00:00,2013-08-13 00:00:00
due_date,2013-08-02 00:00:00,2013-08-09 00:00:00,2013-08-16 00:00:00,2013-08-23 00:00:00,2013-08-23 00:00:00
invoice_amount,913.7,2233.45,1370.5,9195.1,4594.6
purchase_amount,0,0,0,0,0
purchase_amount_open,0,0,0,0,0


In [3]:
inst[inst['has_prosecution']][[inst.columns[c] for c in range(len(inst.columns)) if c<50]].transpose()

uid,2042:512/645,2043:512/646,2044:512/647,2045:512/648,2046:512/649,2047:512/650,1063:INTER715/11390,1108:717/1153,1109:717/1154,1110:717/1155,...,101516:101790/62383,101517:101790/62384,101518:101790/62385,101659:101786/62958,101660:101786/62959,101685:101790/63380,101794:101786/63686,101795:101786/63687,101970:101786/64436,102031:101786/65082
customer_id,2004009,2004009,2004009,2004009,2004009,2004009,2004019,2004016,2004016,2004016,...,2004078,2004078,2004078,2004078,2004078,2004078,2004078,2004078,2004078,2004078
customer_name_1,PM Personal GmbH,PM Personal GmbH,PM Personal GmbH,PM Personal GmbH,PM Personal GmbH,PM Personal GmbH,United Personal Management AG,inter personal GmbH,inter personal GmbH,inter personal GmbH,...,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG,PS Schweiz AG
debtor_id,512,512,512,512,512,512,INTER715,717,717,717,...,101790,101790,101790,101786,101786,101790,101786,101786,101786,101786
debtor_name_1,Elektropartner AG,Elektropartner AG,Elektropartner AG,Elektropartner AG,Elektropartner AG,Elektropartner AG,Pergola Design AG,Team Fortis GmbH,Team Fortis GmbH,Team Fortis GmbH,...,Malergeschäft Ferati GmbH,Malergeschäft Ferati GmbH,Malergeschäft Ferati GmbH,Wood Living AG,Wood Living AG,Malergeschäft Ferati GmbH,Wood Living AG,Wood Living AG,Wood Living AG,Wood Living AG
invoice_number,2042,2043,2044,2045,2046,2047,1063,1108,1109,1110,...,101516,101517,101518,101659,101660,101685,101794,101795,101970,102031
invoice_date,2013-08-13 00:00:00,2013-09-10 00:00:00,2013-09-17 00:00:00,2013-09-24 00:00:00,2013-09-30 00:00:00,2013-10-08 00:00:00,2014-04-16 00:00:00,2014-05-14 00:00:00,2014-05-14 00:00:00,2014-05-14 00:00:00,...,2018-06-25 00:00:00,2018-06-25 00:00:00,2018-06-25 00:00:00,2018-07-02 00:00:00,2018-07-02 00:00:00,2018-07-03 00:00:00,2018-07-09 00:00:00,2018-07-09 00:00:00,2018-07-17 00:00:00,2018-07-23 00:00:00
due_date,2013-08-23 00:00:00,2013-09-20 00:00:00,2013-09-27 00:00:00,2013-10-04 00:00:00,2013-10-10 00:00:00,2013-10-18 00:00:00,2014-04-26 00:00:00,2014-05-24 00:00:00,2014-05-24 00:00:00,2014-05-24 00:00:00,...,2018-07-05 00:00:00,2018-07-05 00:00:00,2018-07-05 00:00:00,2018-07-12 00:00:00,2018-07-12 00:00:00,2018-07-13 00:00:00,2018-07-19 00:00:00,2018-07-19 00:00:00,2018-07-27 00:00:00,2018-08-02 00:00:00
invoice_amount,4594.6,2751.85,2801,2850.1,3120.4,2555.3,2257.2,1542.25,8655.3,1542.25,...,1277.05,1277.05,1277.05,1933.75,1933.75,1277.05,1890.8,1890.8,1117.3,1001.25
purchase_amount,0,0,0,0,0,0,0,1542.25,8655.3,1542.25,...,0,0,0,0,0,0,0,0,0,0
purchase_amount_open,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
pamount = inst.payment_amount.apply(lambda x:len(x))
pdate = inst.payment_amount.apply(lambda x:len(x))

## 1. Defining snapshot slices

In order to create snapshots of different time frames, the report date will be progressively changed and used to slice the dataframe.  

In [5]:
ReportDate = datetime.datetime(2018, 9, 28) #date data was received

daterange = pd.date_range(start=inst.invoice_date.min(), end=ReportDate, freq='M')

In [6]:
len(daterange)

62

In [7]:
pd.to_datetime(str(daterange[0]).split(' ')[0], yearfirst=True)

Timestamp('2013-07-31 00:00:00')

In [8]:
daterange[0]<ReportDate

True

In [None]:
#this is very slow
for snap in range(len(daterange)):
    label = "sshot_"+str(snap)+'_'
    repdate = pd.to_datetime(str(daterange[snap]).split(' ')[0], yearfirst=True)
    inst[label]=False
    inst.loc[inst.invoice_date<repdate, label]=True
    add_main_features(inst, repdate, prefix=label)
    

In [54]:
selnum=48
print(daterange[selnum])
selector = 'sshot_'+str(selnum)+'_'
inst[inst[selector] & inst['has_purchase']][[c for c in inst.columns if selector in c]+ \
                     ['invoice_date','payment_date','dates_to_count', 
                      'purchase_amount', 'has_purchase', 'due_date']].iloc[1000:1010].transpose()

2017-07-31 00:00:00


uid,40012220216:0260002/1942,40013220216:0260002/1943,15385:4024/1944,660:8/1945,661:8/1946,662:8/1947,663:8/1948,160039:10012/1965,160042:10012/1968,160040:10019/1966
sshot_48_,True,True,True,True,True,True,True,True,True,True
sshot_48_discharge_loss,0,0,0,0,0,0,0,0,0,0
sshot_48_has_impairment1,False,False,False,False,False,False,False,False,False,False
sshot_48_has_impairment2,False,False,False,False,False,False,False,False,False,False
sshot_48_total_impairment,0,0,0,0,0,0,0,0,0,0
sshot_48_has_prosecution,False,False,False,False,False,False,False,False,False,False
sshot_48_has_purchase,True,True,True,True,True,True,True,True,True,True
sshot_48_has_deduction,False,False,False,False,False,False,False,False,False,False
sshot_48_has_discharge,False,False,False,False,False,False,False,False,False,False
sshot_48_payment_date,"[2016-06-09 00:00:00, 2016-06-09 00:00:00, 201...","[2016-06-09 00:00:00, 2016-06-09 00:00:00, 201...","[2016-03-04 00:00:00, 2016-03-04 00:00:00, 201...","[2016-04-28 00:00:00, 2016-04-28 00:00:00, 201...","[2016-04-28 00:00:00, 2016-04-28 00:00:00, 201...","[2016-04-28 00:00:00, 2016-04-28 00:00:00, 201...","[2016-05-11 00:00:00, 2016-05-11 00:00:00, 201...","[2016-03-31 00:00:00, 2016-03-31 00:00:00]","[2016-03-31 00:00:00, 2016-03-31 00:00:00]","[2016-05-11 00:00:00, 2016-05-11 00:00:00, 201..."


In [11]:

print(daterange[4])
inst[inst['sshot_5_']][[c for c in inst.columns if 'sshot_5_' in c] + ['dates_to_count']].iloc[10:25].transpose()

2013-11-30 00:00:00


KeyError: 'sshot_5_'

In [32]:
inst.loc['2043:512/646'].due_date

Timestamp('2013-09-20 00:00:00')

In [24]:
inst[inst.payment_amount.apply(lambda x:len(x))>1].head().transpose()

uid,2744:79/231,2861:79/232,2932:79/233,1472:489/688,2042:512/645
customer_id,2004008,2004008,2004008,2004009,2004009
customer_name_1,jobs united GmbH,jobs united GmbH,jobs united GmbH,PM Personal GmbH,PM Personal GmbH
debtor_id,79,79,79,489,512
debtor_name_1,Quadroni Linard,Quadroni Linard,Quadroni Linard,Style Interiors,Elektropartner AG
invoice_number,2744,2861,2932,1472,2042
invoice_date,2013-07-23 00:00:00,2013-07-30 00:00:00,2013-08-06 00:00:00,2013-08-13 00:00:00,2013-08-13 00:00:00
due_date,2013-08-02 00:00:00,2013-08-09 00:00:00,2013-08-16 00:00:00,2013-08-23 00:00:00,2013-08-23 00:00:00
invoice_amount,913.7,2233.45,1370.5,9195.1,4594.6
purchase_amount,0,0,0,0,0
purchase_amount_open,0,0,0,0,0


In [9]:
def select_payment(x):
    payments = x.payment_amount
    filter = x.dates_to_count
    return type(payments)

inst[["payment_amount", "dates_to_count"]].apply(select_payment, axis=1)

KeyError: "['dates_to_count'] not in index"