In [15]:
import os
import sys
import math
import random

In [16]:
import numpy as np 
np.__version__

'2.1.3'

In [17]:
import pandas as pd
pd.__version__

'2.2.3'

In [18]:
import networkx as nx 
nx.__version__

'3.4.2'

In [19]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import seaborn as sns

## Directory paths

In [20]:
# Define directories
projdir = os.path.abspath(os.getcwd())

### Import own methods

In [21]:
sys.path.insert(0, projdir)

In [22]:
import src.model as model
import src.dists as dists

### Demonstration run

In [23]:
# Short run with a few nodes
N = 100
X = 10
# initialize the model                        # Defaults:
nodes = model.create_nodes(N,spending=0.5)    # Homogeneous activity
transitions = model.initialize_transition_matrix(nodes) # Random selection
activations = model.initialize_activations(nodes)    # Poissonian activation
balances = model.initialize_balances(nodes)       # 1.0000 initial balance
# print the output header
header = ["timestamp","source","target","amount","source_bal","target_bal"]
print(",".join(header))
# run the model
for i in range(X):
    transaction = model.transact(nodes,activations,transitions,balances)
    print(",".join([str(transaction[term]) for term in header]))

timestamp,source,target,amount,source_bal,target_bal
0.007859085028843983,14,12,0.5075,0.4925,1.5075
0.013370340110931196,1,24,0.4951,0.5049,1.4951
0.024661474088877873,56,32,0.5036,0.4964,1.5036
0.031133765885722003,37,62,0.5024,0.4976,1.5024
0.051216982395007854,0,73,0.5095,0.4905,1.5095
0.0540322465846581,21,36,0.4969,0.5031,1.4969
0.061143359503553395,33,18,0.5029,0.4971,1.5029
0.062468519964546755,28,96,0.4980,0.5020,1.4980
0.06812630947949348,54,93,0.5001,0.4999,1.5001
0.07459259312133,76,56,0.5051,0.4949,1.0015


### Now with inputs

In [24]:
# Example run
N = 1000
T = 14 # two 'weeks'

# Create activity and attractivity distributions               # Options:
unif_act, unif_att = dists.paired_samples(N)                   # Uncorrelated samples from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,same=True)         # Two of the same sample from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,params={'copula':'clayton','reversed':True,'theta':5})  # Coupled samples
vect_act = dists.scale_pareto(unif_act, beta=2.0)              # Pareto, normalized to a mean of 1
vect_att = dists.scale_pareto(unif_att, beta=1.5)              # Can be different

# Initialize the model                                                                     # Options:                     
nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, spending=0.2)      # Spending probability (or, heterogeneous with a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, burstiness=2)      # Shape param for Weibull inter-event times (or, a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, mean_iet=24*60*60) # Mean activity once per 24 hours (in seconds)
#T = 14*24*60*60                                                                            # Note: also adjust T accordingly
transitions = model.initialize_transition_matrix(nodes)
activations = model.initialize_activations(nodes) # TODO: scale the time units without changing anything else, don't call it iet
balances = model.initialize_balances(nodes,balances=100*np.ones(N),decimals=2) # 100.00 instead of 1.0000

# filename 
filename = os.path.join(projdir,"example.csv")
header = ["timestamp","source","target","amount","source_bal","target_bal"]

with open(filename, "w") as file:
    file.write(",".join(header) + "\n")
    t = 0
    while t < T:
        transaction = model.transact(nodes,activations,transitions,balances)
        file.write(",".join([str(transaction[term]) for term in header]) + "\n")
        t = transaction["timestamp"]

### Transactions and balances

In [25]:
# Transaction data
dtypes = {}
dtypes['timestamp'] = 'float'
dtypes['source'] = 'int'
dtypes['target'] = 'int'
dtypes['amount'] = 'float'
dtypes['source_bal'] = 'float'
dtypes['target_bal'] = 'float'

In [26]:
# Load the data
filename = os.path.join(projdir,"example.csv")
txns = pd.read_csv(filename,dtype=dtypes)
# Drop the last transaction
txns.drop(txns[txns['timestamp']>=T].index,inplace=True) # drop the last transaction
# Print the number of transactions
print(f"example.csv: {len(txns)} transactions")

example.csv: 14214 transactions


In [27]:
# Get the total spent by each node
totals = txns.groupby("source").agg({'amount': ['count','sum','mean','median','max'],'target':'nunique'})
totals.columns = ["txns","total","mean","median","max","targets"]
totals.reset_index(inplace=True)
# Show the resulting dataframe
totals.head()

Unnamed: 0,source,txns,total,mean,median,max,targets
0,0,6,371.59,61.931667,60.99,87.83,6
1,1,6,110.98,18.496667,17.955,23.54,6
2,2,7,98.61,14.087143,13.35,19.68,6
3,3,24,126.78,5.2825,2.94,23.09,17
4,4,17,155.75,9.161765,7.72,20.52,14


In [28]:
# Transform to get the daily transactions and day-end balances
txns['day'] = txns['timestamp'].apply(lambda x: math.ceil(x/(T/14))) # we're grabbing end-of-day balances

# Transform to account-based dataset
df_source = txns[['timestamp','day','source','source_bal']].rename(columns={'source': 'account', 'source_bal': 'balance'})
df_target = txns[['timestamp','day','target','target_bal']].rename(columns={'target': 'account', 'target_bal': 'balance'})
df_balance = pd.concat([df_source, df_target]).sort_values('timestamp')
# Aggregate daily (it is already sorted)
balances = df_balance.groupby(['account','day']).agg({'timestamp':'count',
                                                      'balance': 'last'}).rename(columns={'timestamp':'txns'}) # note the 'last' function from pandas
# Create a new multi-index with all account-days
all_observations = pd.MultiIndex.from_product([balances.index.levels[0], range(1,T+1)], names=['account', 'day'])
# Reindex your DataFrame to include all account-days and forward fill missing values
balances = balances.reindex(all_observations).fillna(method='ffill') # forward fill missing values to cover days with no transactions
# Get the timestamp, day, and account as columns
balances.reset_index(inplace=True)
# Print the number of observations
print(f"example.csv: {balances.shape[0]} day-account balance observations")
# Show the resulting dataframe
balances.head()

example.csv: 14000 day-account balance observations


Unnamed: 0,account,day,txns,balance
0,0,1,1.0,120.32
1,0,2,1.0,146.94
2,0,3,3.0,195.05
3,0,4,2.0,180.26
4,0,5,3.0,218.77
