In [1]:
import os
import sys
import math
import random

In [2]:
import numpy as np # todo: update numpy so we can use the new, proper random number Generator class
np.__version__

'2.1.3'

In [3]:
import pandas as pd
pd.__version__

'2.2.3'

In [4]:
import networkx as nx 
nx.__version__

'3.4.2'

In [5]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import seaborn as sns

## Directory paths

In [6]:
# Define directories
projdir = os.path.abspath(os.getcwd())

### Import own methods

In [7]:
sys.path.insert(0, projdir)

In [8]:
import methods.model as model
import methods.dists as dists

### Demonstration run

In [9]:
# Short run with a few nodes
N = 100
X = 10
# initialize the model                        # Defaults:
nodes = model.create_nodes(N,spending=0.5)    # Homogeneous activity
acts = model.initialize_activations(nodes)    # Poissonian activation
atts = model.initialize_attractivities(nodes) # Random selection
bals = model.initialize_balances(nodes)       # 1.0000 initial balance
# print the output header
header = ["timestamp","source","target","amount","source_bal","target_bal"]
print(",".join(header))
# run the model
for i in range(X):
    transaction = model.transact(nodes,acts,atts,bals)
    print(",".join([str(transaction[term]) for term in header]))

TypeError: tuple indices must be integers or slices, not dict

### Now with inputs

In [10]:
# Example run
N = 1000
T = 14 # two 'weeks'

# Create activity and attractivity distributions               # Options:
unif_act, unif_att = dists.paired_samples(N)                   # Uncorrelated samples from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,same=True)         # Two of the same sample from the uniform distribution
#unif_act, unif_att = dists.paired_samples(N,params={'copula':'clayton','reversed':True,'theta':5})  # Coupled samples
vect_act = dists.scale_pareto(unif_act, beta=2.0)              # Pareto, normalized to a mean of 1
vect_att = dists.scale_pareto(unif_att, beta=1.5)              # Can be different

# Initialize the model                                                                     # Options:                     
nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, spending=0.2)      # Spending probability (or, heterogeneous with a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, burstiness=2)      # Shape param for Weibull inter-event times (or, a list/array)
#nodes = model.create_nodes(N, activity=vect_act, attractivity=vect_att, mean_iet=24*60*60) # Mean activity once per 24 hours (in seconds)
#T = 14*24*60*60                                                                            # Note: also adjust T accordingly
acts = model.initialize_activations(nodes)
atts = model.initialize_attractivities(nodes)
bals = model.initialize_balances(nodes,balances=100*np.ones(N),decimals=2) # 100.00 instead of 1.0000

# filename 
filename = os.path.join(projdir,"example.csv")
header = ["timestamp","source","target","amount","source_bal","target_bal"]

with open(filename, "w") as file:
    file.write(",".join(header) + "\n")
    t = 0
    while t < T:
        transaction = model.transact(nodes,acts,atts,bals)
        file.write(",".join([str(transaction[term]) for term in header]) + "\n")
        t = transaction["timestamp"]

### Transactions and balances

In [11]:
# Transaction data
dtypes = {}
dtypes['timestamp'] = 'float'
dtypes['source'] = 'int'
dtypes['target'] = 'int'
dtypes['amount'] = 'float'
dtypes['source_bal'] = 'float'
dtypes['target_bal'] = 'float'

In [12]:
# Load the data
filename = os.path.join(projdir,"example.csv")
txns = pd.read_csv(filename,dtype=dtypes)
# Drop the last transaction
txns.drop(txns[txns['timestamp']>=T].index,inplace=True) # drop the last transaction
# Print the number of transactions
print(f"example.csv: {len(txns)} transactions")

example.csv: 13942 transactions


In [13]:
# Get the total spent by each node
totals = txns.groupby("source").agg({'amount': ['count','sum','mean','median','max'],'target':'nunique'})
totals.columns = ["txns","total","mean","median","max","targets"]
totals.reset_index(inplace=True)
# Show the resulting dataframe
totals.head()

Unnamed: 0,source,txns,total,mean,median,max,targets
0,0,20,218.23,10.9115,9.44,23.36,18
1,1,28,111.48,3.981429,1.735,19.77,21
2,2,12,98.55,8.2125,7.255,19.86,11
3,3,27,262.92,9.737778,8.6,19.68,24
4,4,29,239.09,8.244483,6.43,24.7,24


In [14]:
# Transform to get the daily transactions and day-end balances
txns['day'] = txns['timestamp'].apply(lambda x: math.ceil(x/(T/14))) # we're grabbing end-of-day balances

# Transform to account-based dataset
df_source = txns[['timestamp','day','source','source_bal']].rename(columns={'source': 'account', 'source_bal': 'balance'})
df_target = txns[['timestamp','day','target','target_bal']].rename(columns={'target': 'account', 'target_bal': 'balance'})
df_balance = pd.concat([df_source, df_target]).sort_values('timestamp')
# Aggregate daily (it is already sorted)
balances = df_balance.groupby(['account','day']).agg({'timestamp':'count',
                                                      'balance': 'last'}).rename(columns={'timestamp':'txns'}) # note the 'last' function from pandas
# Create a new multi-index with all account-days
all_observations = pd.MultiIndex.from_product([balances.index.levels[0], range(1,T//(24*60*60)+1)], names=['account', 'day'])
# Reindex your DataFrame to include all account-days and forward fill missing values
balances = balances.reindex(all_observations).fillna(method='ffill') # forward fill missing values to cover days with no transactions
# Get the timestamp, day, and account as columns
balances.reset_index(inplace=True)
balances['timestamp'] = balances['day']*24*60*60
# Print the number of observations
print(f"example.csv: {balances.shape[0]} day-account balance observations")
# Show the resulting dataframe
balances.head()

example.csv: 0 day-account balance observations


Unnamed: 0,account,day,txns,balance,timestamp
