In [1]:
import os
import sys
import math
import random

In [2]:
import numpy as np # todo: update numpy so we can use the new, proper random number Generator class
np.__version__

'2.1.3'

In [3]:
import pandas as pd
pd.__version__

'2.2.3'

In [4]:
import networkx as nx 
nx.__version__

'3.4.2'

In [5]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import seaborn as sns

## Directory paths

In [6]:
# Define directories
projdir = os.path.abspath(os.getcwd())

### Import own methods

In [7]:
sys.path.insert(0, projdir)

In [8]:
import methods.model as model
import methods.dists as dists

### Demonstration run

In [9]:
# Short run with a few nodes
N = 100
X = 10
# initialize the model                        # Defaults:
nodes= model.create_nodes(N,spending=0.5)    # Homogeneous activity
transitions = model.initialize_transition_matrix(nodes) # Random selection
activations = model.initialize_activations(nodes)    # Poissonian activation
balances = model.initialize_balances(nodes)       # 1.0000 initial balance
# print the output header
header = ["timestamp","source","target","amount","source_bal","target_bal"]
print(",".join(header))
# run the model
for i in range(X):
    transaction = model.transact(nodes,activations,transitions,balances)
    print(",".join([str(transaction[term]) for term in header]))

timestamp,source,target,amount,source_bal,target_bal
0.018281139293636364,47,63,0.5048,0.4952,1.5048
0.029316954401082052,30,3,0.4926,0.5074,1.4926
0.030962993305199117,36,56,0.4966,0.5034,1.4966
0.03293754528124442,28,61,0.4942,0.5058,1.4942
0.04218164013297151,3,15,0.7426,0.7500,1.7426
0.04498275723471715,47,31,0.2525,0.2427,1.2525
0.05935690132401415,22,59,0.5001,0.4999,1.5001
0.07283833376032232,73,85,0.4916,0.5084,1.4916
0.0772816788972889,15,43,0.8698,0.8728,1.8698
0.08843624499698309,20,39,0.4951,0.5049,1.4951


### Now with inputs

In [10]:
# Example run
N = 1000
T = 14 # two 'weeks'

# Create activity and attractivity distributions               # Options:
unif_act, unif_att = dists.paired_samples(N)                   # Uncorrelated samples from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,same=True)         # Two of the same sample from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,params={'copula':'clayton','reversed':True,'theta':5})  # Coupled samples
vect_act = dists.scale_pareto(unif_act, beta=2.0)              # Pareto, normalized to a mean of 1
vect_att = dists.scale_pareto(unif_att, beta=1.5)              # Can be different

# Initialize the model                                                                     # Options:                     
nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, spending=0.2)      # Spending probability (or, heterogeneous with a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, burstiness=2)      # Shape param for Weibull inter-event times (or, a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, mean_iet=24*60*60) # Mean activity once per 24 hours (in seconds)
#T = 14*24*60*60                                                                            # Note: also adjust T accordingly
transitions = model.initialize_transition_matrix(nodes)
activations = model.initialize_activations(nodes)
balances = model.initialize_balances(nodes,balances=100*np.ones(N),decimals=2) # 100.00 instead of 1.0000

# filename 
filename = os.path.join(projdir,"example.csv")
header = ["timestamp","source","target","amount","source_bal","target_bal"]

with open(filename, "w") as file:
    file.write(",".join(header) + "\n")
    t = 0
    while t < T:
        transaction = model.transact(nodes,activations,transitions,balances)
        file.write(",".join([str(transaction[term]) for term in header]) + "\n")
        t = transaction["timestamp"]

### Transactions and balances

In [11]:
# Transaction data
dtypes = {}
dtypes['timestamp'] = 'float'
dtypes['source'] = 'int'
dtypes['target'] = 'int'
dtypes['amount'] = 'float'
dtypes['source_bal'] = 'float'
dtypes['target_bal'] = 'float'

In [12]:
# Load the data
filename = os.path.join(projdir,"example.csv")
txns = pd.read_csv(filename,dtype=dtypes)
# Drop the last transaction
txns.drop(txns[txns['timestamp']>=T].index,inplace=True) # drop the last transaction
# Print the number of transactions
print(f"example.csv: {len(txns)} transactions")

example.csv: 14370 transactions


In [13]:
# Get the total spent by each node
totals = txns.groupby("source").agg({'amount': ['count','sum','mean','median','max'],'target':'nunique'})
totals.columns = ["txns","total","mean","median","max","targets"]
totals.reset_index(inplace=True)
# Show the resulting dataframe
totals.head()

Unnamed: 0,source,txns,total,mean,median,max,targets
0,0,5,119.09,23.818,24.83,28.74,5
1,1,3,50.05,16.683333,16.64,20.28,3
2,2,7,188.03,26.861429,27.16,30.78,7
3,3,8,103.03,12.87875,12.155,20.42,8
4,4,40,181.46,4.5365,3.13,19.93,34


In [14]:
# Transform to get the daily transactions and day-end balances
txns['day'] = txns['timestamp'].apply(lambda x: math.ceil(x/(T/14))) # we're grabbing end-of-day balances

# Transform to account-based dataset
df_source = txns[['timestamp','day','source','source_bal']].rename(columns={'source': 'account', 'source_bal': 'balance'})
df_target = txns[['timestamp','day','target','target_bal']].rename(columns={'target': 'account', 'target_bal': 'balance'})
df_balance = pd.concat([df_source, df_target]).sort_values('timestamp')
# Aggregate daily (it is already sorted)
balances = df_balance.groupby(['account','day']).agg({'timestamp':'count',
                                                      'balance': 'last'}).rename(columns={'timestamp':'txns'}) # note the 'last' function from pandas
# Create a new multi-index with all account-days
all_observations = pd.MultiIndex.from_product([balances.index.levels[0], range(1,T+1)], names=['account', 'day'])
# Reindex your DataFrame to include all account-days and forward fill missing values
balances = balances.reindex(all_observations).fillna(method='ffill') # forward fill missing values to cover days with no transactions
# Get the timestamp, day, and account as columns
balances.reset_index(inplace=True)
# Print the number of observations
print(f"example.csv: {balances.shape[0]} day-account balance observations")
# Show the resulting dataframe
balances.head()

example.csv: 14000 day-account balance observations


Unnamed: 0,account,day,txns,balance
0,0,1,,
1,0,2,2.0,123.82
2,0,3,1.0,98.99
3,0,4,1.0,105.49
4,0,5,1.0,105.49
