In [1]:
#!pip install pyvis  

In [2]:
#!pip install beautifulsoup4

In [3]:
import os # For clearing variables and setting working directory 
import numpy as np  
import pandas as pd  # For data manipulation
import matplotlib.pyplot as plt  
import networkx as nx    # NetworkX is a widely used package for graph operations, similar to igraph in R.
from pyvis.network import Network  

In [4]:
from IPython.display import clear_output  # Clear console
clear_output(wait=True)  

In [5]:
pd.set_option('display.float_format', lambda x: '%.0f' % x) #Turning off scientific notation

In [6]:
os.chdir("C:\\Users\\blang\\OneDrive\\Desktop\\RProject with dummy data")  # sets the working directory 

In [7]:
# Load data into a DataFrame
transparency = pd.read_csv("transparency.csv", dtype=str)

In [8]:
# Replace NA (missing values) with empty strings
transparency = transparency.replace({np.nan: ''})

In [9]:
# Ensure the DataFrame is properly loaded
print(transparency.head())  # Optional: Preview the data

  Code speriod1 speriod2 speriod3 speriod4 speriod5 chperiod1 chperiod2  \
0   AE       68       69       70       70       66         0         1   
1   AF        8        8       12       11       15         0         0   
2   AL       33       31       33       36       39         0        -1   
3   AM       34       36       37       35       33         0         1   
4   AO       22       23       19       15       18         0         1   

  chperiod3 chperiod4 chperiod5  
0         1         0        -1  
1         1        -1         1  
2         1         1         1  
3         1        -1        -1  
4        -1        -1         1  


In [10]:
payments = pd.read_csv("data4-3.csv")  # Note: Ensure the data file is in the working directory set earlier. 

In [11]:
payments1 = payments.copy()  # Create a copy of the dataframe  

In [12]:
print(payments1.columns) # Ensure all columns are in the right format 

Index(['Year', 'Month', 'Initial Ordering BIC8', 'Initial Ordering BIC8 name',
       'Initial Ordering Country', 'BIC8', 'BIC8 name', 'BIC8 Country',
       'Counterparty BIC8', 'Counterparty BIC8 name',
       'Counterparty BIC8 Country', 'End Beneficiary BIC8',
       'End Beneficiary BIC8 name', 'End Beneficiary Country', 'Message Type',
       'Currency', 'Net.USD.Amount', 'Transactions'],
      dtype='object')


In [13]:
# Replace spaces with dots in all column names of the DataFrame
payments1.columns = payments1.columns.str.replace(' ', '.', regex=False)

# Check the updated column names
print(payments1.columns)


Index(['Year', 'Month', 'Initial.Ordering.BIC8', 'Initial.Ordering.BIC8.name',
       'Initial.Ordering.Country', 'BIC8', 'BIC8.name', 'BIC8.Country',
       'Counterparty.BIC8', 'Counterparty.BIC8.name',
       'Counterparty.BIC8.Country', 'End.Beneficiary.BIC8',
       'End.Beneficiary.BIC8.name', 'End.Beneficiary.Country', 'Message.Type',
       'Currency', 'Net.USD.Amount', 'Transactions'],
      dtype='object')


In [14]:
# If the column name is slightly different or has leading/trailing spaces, rename it
payments1.rename(columns=lambda x: x.strip(), inplace=True)  # Removes leading/trailing spaces

In [15]:
payments1.columns = payments1.columns.str.strip()

In [16]:
# List of columns to convert to string
string_columns = [
    "Year", "Month", "Initial.Ordering.BIC8", "Initial.Ordering.BIC8.name",
    "Initial.Ordering.Country", "BIC8", "BIC8.name", "BIC8.Country",
    "Counterparty.BIC8", "Counterparty.BIC8.name", "Counterparty.BIC8.Country",
    "End.Beneficiary.BIC8", "End.Beneficiary.BIC8.name", "End.Beneficiary.Country",
    "Message.Type", "Currency"
]

# Convert columns to string if they exist in the DataFrame
for col in string_columns:
    if col in payments1.columns:
        payments1[col] = payments1[col].astype(str)
    else:
        print(f"Warning: Column '{col}' does not exist in the DataFrame.") # Adds a warning message for any columns
                                                                           # that are missing, making it easier to debug.

# List of columns to convert to numeric
numeric_columns = ["Net.USD.Amount", "Transactions"]

# Convert columns to numeric if they exist in the DataFrame
for col in numeric_columns:
    if col in payments1.columns:
        payments1[col] = pd.to_numeric(payments1[col], errors='coerce')
    else:
        print(f"Warning: Column '{col}' does not exist in the DataFrame.")


In [17]:
print(payments1.columns.tolist())

['Year', 'Month', 'Initial.Ordering.BIC8', 'Initial.Ordering.BIC8.name', 'Initial.Ordering.Country', 'BIC8', 'BIC8.name', 'BIC8.Country', 'Counterparty.BIC8', 'Counterparty.BIC8.name', 'Counterparty.BIC8.Country', 'End.Beneficiary.BIC8', 'End.Beneficiary.BIC8.name', 'End.Beneficiary.Country', 'Message.Type', 'Currency', 'Net.USD.Amount', 'Transactions']


In [18]:
# Creates a variable "flow", which will be used to determine 
# whether a transaction was an inflow or outflow
payments1["flow"] = 0  # Initialize the "flow" column to 0

# Update "flow" column based on conditions
payments1.loc[payments1["Initial.Ordering.BIC8"] == payments1["BIC8"], "flow"] = 1
payments1.loc[payments1["End.Beneficiary.BIC8"] == payments1["BIC8"], "flow"] = -1

In [19]:
df = payments1.copy()  # Create the final dataframe

In [20]:
# Optional: Preview the DataFrame
print(df.head())

   Year Month Initial.Ordering.BIC8 Initial.Ordering.BIC8.name  \
0  2015   Jul              BNKAXXYY                        nan   
1  2015   Jul              BNKAXXYY                        nan   
2  2015   Jul              BNKCXXYY                        nan   
3  2015   Jul              BNKDXXYY                        nan   
4  2015   Jul              BNKDXXYY                        nan   

  Initial.Ordering.Country      BIC8 BIC8.name BIC8.Country Counterparty.BIC8  \
0                       XX  BNKAXXYY       nan           XX              GLOB   
1                       XX  BNKAXXYY       nan           XX              GLOC   
2                       XX  BNKCXXYY       nan           XX              GLOH   
3                       XX  BNKDXXYY       nan           XX              GLOK   
4                       XX  BNKDXXYY       nan           XX              GLOK   

  Counterparty.BIC8.name Counterparty.BIC8.Country End.Beneficiary.BIC8  \
0                    nan                 

In [21]:
import datetime  
from datetime import date  # Import the necessary library
today_date = date.today()  # Define today's date

In [22]:
# 1. Today's date  
date = datetime.date.today()  # Gets today's date  

# 2. Pathway length toggle  
pathway_length_toggle = "r"  # 'r' for correspondent relationships, 'e' for endpoint, 'f' for full  

# 3. System or individual view  
system_or_individual_view = "s"  # 's' for system or 'b' for bank  

# 4. Country or institution view  
country_or_institution_view = "c"  # 'c' for country or 'i' for institution  

# 5. Color using index  
color_using_index = "yes"  # 'yes' or 'no'; requires an additional file if 'yes'  

# 6. Selected bank  
selected_bank = "BNKAXXYY"  # Needs to be a BIC 8-letter code  

# 7. Selected country  
selected_country = "XX"  # Needs to be an ISO 2-letter code  

# 8. Selected period  
selected_period = 6  # Selects the period (1-6) for which the map will be generated  

# 9. Selected quarter  
selected_quarter = "y"  # Selects the quarter (1-4, or 'y' for entire year)  

# 10. Currency  
currency = "USD"  # Needs to be a 3-letter currency ticker; use 'All' if not specifying a single currency  

In [23]:
# Print the current settings for reference (optional)
print("Date:", today_date)
print("Pathway Length Toggle:", pathway_length_toggle)
print("System or Individual View:", system_or_individual_view)
print("Country or Institution View:", country_or_institution_view)
print("Color Using Index:", color_using_index)
print("Selected Bank:", selected_bank)
print("Selected Country:", selected_country)
print("Selected Period:", selected_period)
print("Selected Quarter:", selected_quarter)
print("Currency:", currency)

Date: 2025-03-03
Pathway Length Toggle: r
System or Individual View: s
Country or Institution View: c
Color Using Index: yes
Selected Bank: BNKAXXYY
Selected Country: XX
Selected Period: 6
Selected Quarter: y
Currency: USD


In [24]:
# Filter the DataFrame based on currency  
if currency != "All":  
    dataframe_currency = df[df['Currency'] == currency]  
else:  
    dataframe_currency = df  

In [25]:
# Filter the DataFrame based on the selected bank if individual view is selected  
# If system_or_individual_view is "b", the DataFrame is filtered to include only rows where 'BIC8' matches the selected_bank.
if system_or_individual_view == "b":  
    dataframe_currency = dataframe_currency[dataframe_currency['BIC8'] == selected_bank]  

In [26]:
# Usually, the data is in the Month-YY format (e.g. "Jan-12")  
# Here we set years for the selected periods  
year1 = "2015"  
year2 = "2016"  
year3 = "2015"  
year4 = "2016"  
year5 = "2015"  
year6 = "2016" 

In [27]:
# FILTER PERIOD:
# Function to aggregate data by year and quarter
def aggregate_by_period(df, year, quarter):
    # Filter data by year
    df_year = df[df['Year'] == year]
    
    # Filter by quarter or entire year
    if quarter == "y":
        return df_year  # Entire year
    elif quarter in [1, "1"]:
        return df_year[df_year['Month'].isin(["Jan", "Feb", "Mar"])]
    elif quarter in [2, "2"]:
        return df_year[df_year['Month'].isin(["Apr", "May", "Jun"])]
    elif quarter in [3, "3"]:
        return df_year[df_year['Month'].isin(["Jul", "Aug", "Sep"])]
    elif quarter in [4, "4"]:
        return df_year[df_year['Month'].isin(["Oct", "Nov", "Dec"])]
    else:
        return pd.DataFrame()   # Return an empty DataFrame if quarter is invalid

In [28]:
# Aggregate data for each period
# Separate DataFrames (df1 through df6) are created for each year and quarter combination.
df1 = aggregate_by_period(dataframe_currency, year1, selected_quarter)
df2 = aggregate_by_period(dataframe_currency, year2, selected_quarter)
df3 = aggregate_by_period(dataframe_currency, year3, selected_quarter)
df4 = aggregate_by_period(dataframe_currency, year4, selected_quarter)
df5 = aggregate_by_period(dataframe_currency, year5, selected_quarter)
df6 = aggregate_by_period(dataframe_currency, year6, selected_quarter)

In [29]:
# Function for correspondent banks only (pathway_length_toggle == "r")
# Splits the transactions into inflows and outflows, rearranges the columns, and concatenates the two subsets.
def bind_graphs_correspondent(df):
    graph1 = df[df['flow'] == 1][["BIC8", "BIC8.Country", "Counterparty.BIC8", "Counterparty.BIC8.Country", "Currency", "Net.USD.Amount", "Transactions"]]
    graph1.columns = ["from", "from.country", "to", "to.country", "currency", "weight", "volume"]

    graph2 = df[df['flow'] == -1][["Counterparty.BIC8", "Counterparty.BIC8.Country", "BIC8", "BIC8.Country", "Currency", "Net.USD.Amount", "Transactions"]]
    graph2.columns = ["from", "from.country", "to", "to.country", "currency", "weight", "volume"]

    graph = pd.concat([graph1, graph2], ignore_index=True)
    graph = graph[["from", "to", "weight", "volume", "currency", "from.country", "to.country"]]
    return graph

In [30]:
# Function for endpoints only (pathway_length_toggle == "e")
# Directly maps the Initial.Ordering.BIC8 to End.Beneficiary.BIC8, creating a graph based only on these endpoints.
def bind_graphs_endpoints(df):
    graph = df[["Initial.Ordering.BIC8", "Initial.Ordering.Country", "End.Beneficiary.BIC8", "End.Beneficiary.Country", "Currency", "Net.USD.Amount", "Transactions"]]
    graph.columns = ["from", "from.country", "to", "to.country", "currency", "weight", "volume"]
    return graph

In [31]:
# Function for full payment chain (pathway_length_toggle == "f")
# Combines the correspondent (Initial.Ordering.BIC8 -> Counterparty.BIC8) and endpoint transactions (Counterparty.BIC8 -> End.Beneficiary.BIC8) into a single graph.
def bind_graphs_full_chain(df):
    graph1 = df[["Initial.Ordering.BIC8", "Initial.Ordering.Country", "Counterparty.BIC8", "Counterparty.BIC8.Country", "Currency", "Net.USD.Amount", "Transactions"]]
    graph1.columns = ["from", "from.country", "to", "to.country", "currency", "weight", "volume"]

    graph2 = df[["Counterparty.BIC8", "Counterparty.BIC8.Country", "End.Beneficiary.BIC8", "End.Beneficiary.Country", "Currency", "Net.USD.Amount", "Transactions"]]
    graph2.columns = ["from", "from.country", "to", "to.country", "currency", "weight", "volume"]

    graph = pd.concat([graph1, graph2], ignore_index=True)
    graph = graph[["from", "to", "weight", "volume", "currency", "from.country", "to.country"]]
    return graph

In [32]:
# Apply functions based on pathway_length_toggle
# Column Mapping: The columns are renamed and rearranged to match the desired format for each graph type.
if pathway_length_toggle == "r":
    graphm1 = bind_graphs_correspondent(df1)
    graphm2 = bind_graphs_correspondent(df2)
    graphm3 = bind_graphs_correspondent(df3)
    graphm4 = bind_graphs_correspondent(df4)
    graphm5 = bind_graphs_correspondent(df5)
    graphm6 = bind_graphs_correspondent(df6)

elif pathway_length_toggle == "e":
    graphm1 = bind_graphs_endpoints(df1)
    graphm2 = bind_graphs_endpoints(df2)
    graphm3 = bind_graphs_endpoints(df3)
    graphm4 = bind_graphs_endpoints(df4)
    graphm5 = bind_graphs_endpoints(df5)
    graphm6 = bind_graphs_endpoints(df6)

elif pathway_length_toggle == "f":
    graphm1 = bind_graphs_full_chain(df1)
    graphm2 = bind_graphs_full_chain(df2)
    graphm3 = bind_graphs_full_chain(df3)
    graphm4 = bind_graphs_full_chain(df4)
    graphm5 = bind_graphs_full_chain(df5)
    graphm6 = bind_graphs_full_chain(df6)

In [33]:
def finalize_graph(graph):
    # Remove rows where the from or to columns contain missing values (NaN).
    graph = graph.dropna(subset=["from", "to"])
    
    # Ensure correct data types for columns
    # Columns from and to are converted to strings using .astype(str)
    graph["from"] = graph["from"].astype(str)
    graph["to"] = graph["to"].astype(str)
    # Columns weight and volume are converted to numeric values using pd.to_numeric(). 
    # If there are invalid numeric values, they will be set to NaN due to errors="coerce".
    graph["weight"] = pd.to_numeric(graph["weight"], errors="coerce")
    graph["volume"] = pd.to_numeric(graph["volume"], errors="coerce")
    
    # Remove circular flows (e.g., Bank X --> Bank X) where the from and to columns are equal
    graph = graph[graph["from"] != graph["to"]]
    
    return graph

In [34]:
# Apply the function to each graph
graphm1 = finalize_graph(graphm1)
graphm2 = finalize_graph(graphm2)
graphm3 = finalize_graph(graphm3)
graphm4 = finalize_graph(graphm4)
graphm5 = finalize_graph(graphm5)
graphm6 = finalize_graph(graphm6)

In [35]:
#graphs = [graphm1, graphm2, graphm3, graphm4, graphm5, graphm6]

#for i in range(len(graphs)):
#    graphs[i] = finalize_graph(graphs[i])


In [36]:
# Institution-based aggregation of flows
def graggregate(graph):
    # Group by 'from' and 'to' and sum the 'weight'
    graph_agg = graph.groupby(['from', 'to'], as_index=False).agg({'weight': 'sum'})
    # Ensure data types are consistent
    graph_agg['from'] = graph_agg['from'].astype(str)
    graph_agg['to'] = graph_agg['to'].astype(str)
    graph_agg['weight'] = graph_agg['weight'].astype(float)
    return graph_agg

In [37]:
# Country-based aggregation of flows
def graggregate_country(graph):
    # Group by 'from.country' and 'to.country' and sum the 'weight'
    graph_agg = graph.groupby(['from.country', 'to.country'], as_index=False).agg({'weight': 'sum'})
    # Rename columns
    graph_agg.columns = ['from', 'to', 'weight']
    # Ensure data types are consistent
    graph_agg['from'] = graph_agg['from'].astype(str)
    graph_agg['to'] = graph_agg['to'].astype(str)
    graph_agg['weight'] = graph_agg['weight'].astype(float)
    # Remove circular flows
    graph_agg = graph_agg[graph_agg['from'] != graph_agg['to']]
    return graph_agg

In [38]:
# Apply aggregation based on the toggle
if country_or_institution_view == "i":  # Institution-based
    grapha1 = graggregate(graphm1)
    grapha2 = graggregate(graphm2)
    grapha3 = graggregate(graphm3)
    grapha4 = graggregate(graphm4)
    grapha5 = graggregate(graphm5)
    grapha6 = graggregate(graphm6)
elif country_or_institution_view == "c":  # Country-based
    grapha1 = graggregate_country(graphm1)
    grapha2 = graggregate_country(graphm2)
    grapha3 = graggregate_country(graphm3)
    grapha4 = graggregate_country(graphm4)
    grapha5 = graggregate_country(graphm5)
    grapha6 = graggregate_country(graphm6)

In [39]:
def adjust_edges(old_graph, new_graph):
    # Add a new column 'edge_multiplier' initialized with value 1
    new_graph = new_graph.copy()
    new_graph['edge_multiplier'] = 1

    # Iterate over each row in new_graph
    for i, row in new_graph.iterrows():
        from_node = row['from']
        to_node = row['to']

        # Calculate the total weights in old and new graphs for the same edge
        old_sum = old_graph.loc[
            (old_graph['from'] == from_node) & (old_graph['to'] == to_node), 'weight'
        ].sum()
        new_sum = new_graph.loc[
            (new_graph['from'] == from_node) & (new_graph['to'] == to_node), 'weight'
        ].sum()

        # Adjust 'edge_multiplier' based on the weight difference
        if old_sum != 0:
            if new_sum - old_sum > 0:
                new_graph.at[i, 'edge_multiplier'] = 2
            elif new_sum - old_sum < 0:
                new_graph.at[i, 'edge_multiplier'] = 0.5

    return new_graph

# Apply the function to adjust edge multipliers sequentially
grapha1 = adjust_edges(grapha1, grapha1)
grapha2 = adjust_edges(grapha1, grapha2)
grapha3 = adjust_edges(grapha2, grapha3)
grapha4 = adjust_edges(grapha3, grapha4)
grapha5 = adjust_edges(grapha4, grapha5)
grapha6 = adjust_edges(grapha5, grapha6)


In [40]:
# Create Node List. We use the unique() method to extract unique values from the from and to columns. 
# For country-level aggregation, the relevant columns are from.country and to.country.
# The pd.concat() function combines the two DataFrames, and drop_duplicates() ensures all rows remain unique.
# NA Handling: The combined DataFrame is filtered to remove any rows with NA values, ensuring that only valid entries are kept.
# Data Type: The resulting temp1 column is explicitly converted to string type for consistency.
# Sorting the Lists: The resultant DataFrames are sorted by the temp1 column in alphabetical order.

In [41]:
def create_temp1(graph):
    # Get unique 'from' nodes
    temp1 = pd.DataFrame(graph['from'].unique(), columns=['temp1'])
    return temp1

def create_temp2(graph):
    # Use placeholder values if 'from.country' or 'to.country' attributes are missing
    from_country = graph['from.country'] if 'from.country' in graph.columns else ['Unknown'] * len(graph)
    to_country = graph['to.country'] if 'to.country' in graph.columns else ['Unknown'] * len(graph)
    
    # Get unique 'from' and 'to' nodes
    temp1 = pd.DataFrame(pd.Series(from_country).unique(), columns=['temp1'])
    temp2 = pd.DataFrame(pd.Series(to_country).unique(), columns=['temp1'])
    
    return temp1, temp2

# Create temporary lists based on the view type
if country_or_institution_view == "i":
    tempm1 = create_temp1(grapha1)
    tempm2 = create_temp1(grapha2)
    tempm3 = create_temp1(grapha3)
    tempm4 = create_temp1(grapha4)
    tempm5 = create_temp1(grapha5)
    tempm6 = create_temp1(grapha6)

elif country_or_institution_view == "c":
    tempm1, tempm2 = create_temp2(grapha1)
    tempm3, tempm4 = create_temp2(grapha2)
    tempm5, tempm6 = create_temp2(grapha3)


In [42]:
# Create node list (Institution-level)
# The create_temp1 function creates a unique list of nodes (from and to) at the institution level, 
  # while create_temp2 does the same for the country level.
def create_temp1(graph):
    temp1 = pd.DataFrame(graph['from'].unique(), columns=['temp1'])
    temp2 = pd.DataFrame(graph['to'].unique(), columns=['temp1'])
    temp1 = pd.concat([temp1, temp2]).drop_duplicates().reset_index(drop=True)
    temp1 = temp1[temp1['temp1'].notna()]
    return temp1

# Create node list (Country-level)
def create_temp2(graph):
    temp1 = pd.DataFrame(graph['from'].unique(), columns=['temp1'])
    temp2 = pd.DataFrame(graph['to'].unique(), columns=['temp1'])
    temp1 = pd.concat([temp1, temp2]).drop_duplicates().reset_index(drop=True)
    temp1 = temp1[temp1['temp1'].notna()]
    return temp1

In [43]:
# Merge node list with transparency index
# Transparency data is merged with the node lists if color_using_index == "yes" and the view is country-based.
def merge_with_transp(transparency, node_list):
    temp = transparency[transparency['Code'].isin(node_list['temp1'])]
    temp2 = pd.merge(node_list, temp, how='left', left_on='temp1', right_on='Code').fillna(0)
    return temp2

In [44]:
# Create node lists based on toggle
if country_or_institution_view == "i":
    tempm1 = create_temp1(grapha1)
    tempm2 = create_temp1(grapha2)
    tempm3 = create_temp1(grapha3)
    tempm4 = create_temp1(grapha4)
    tempm5 = create_temp1(grapha5)
    tempm6 = create_temp1(grapha6)
elif country_or_institution_view == "c":
    tempm1 = create_temp2(grapha1)
    tempm2 = create_temp2(grapha2)
    tempm3 = create_temp2(grapha3)
    tempm4 = create_temp2(grapha4)
    tempm5 = create_temp2(grapha5)
    tempm6 = create_temp2(grapha6)

# Sort node lists in alphabetical order
tempm1 = tempm1.sort_values('temp1').reset_index(drop=True)
tempm2 = tempm2.sort_values('temp1').reset_index(drop=True)
tempm3 = tempm3.sort_values('temp1').reset_index(drop=True)
tempm4 = tempm4.sort_values('temp1').reset_index(drop=True)
tempm5 = tempm5.sort_values('temp1').reset_index(drop=True)
tempm6 = tempm6.sort_values('temp1').reset_index(drop=True)

In [45]:
# Merge node lists with transparency index (if applicable)
# Transparency data is merged with the node lists if color_using_index == "yes" and the view is country-based.
if country_or_institution_view == "c" and color_using_index == "yes":
    tempm1 = merge_with_transp(transparency, tempm1)
    tempm2 = merge_with_transp(transparency, tempm2)
    tempm3 = merge_with_transp(transparency, tempm3)
    tempm4 = merge_with_transp(transparency, tempm4)
    tempm5 = merge_with_transp(transparency, tempm5)
    tempm6 = merge_with_transp(transparency, tempm6)

In [46]:
# Generate directed networks using NetworkX
# Directed graphs (DiGraph) are created for each graph using NetworkX, with edges weighted by weight.
def create_network(graph, vertices):
    G = nx.DiGraph()
    for _, row in graph.iterrows():
        G.add_edge(row['from'], row['to'], weight=row['weight'])
    nx.set_node_attributes(G, {node: data['temp1'] for node, data in vertices.iterrows()})
    return G

if country_or_institution_view == "c":
    networkm1 = create_network(grapha1, tempm1)
    networkm2 = create_network(grapha2, tempm2)
    networkm3 = create_network(grapha3, tempm3)
    networkm4 = create_network(grapha4, tempm4)
    networkm5 = create_network(grapha5, tempm5)
    networkm6 = create_network(grapha6, tempm6)
else:
    networkm1 = create_network(grapha1, tempm1)
    networkm2 = create_network(grapha2, tempm2)
    networkm3 = create_network(grapha3, tempm3)
    networkm4 = create_network(grapha4, tempm4)
    networkm5 = create_network(grapha5, tempm5)
    networkm6 = create_network(grapha6, tempm6)

In [47]:
# Generate PageRank scores for each network
prm1 = nx.pagerank(networkm1)
prm2 = nx.pagerank(networkm2)
prm3 = nx.pagerank(networkm3)
prm4 = nx.pagerank(networkm4)
prm5 = nx.pagerank(networkm5)
prm6 = nx.pagerank(networkm6)

In [48]:
# Select the network based on the selected period
if selected_period == 1:
    selected_network = networkm1
elif selected_period == 2:
    selected_network = networkm2
elif selected_period == 3:
    selected_network = networkm3
elif selected_period == 4:
    selected_network = networkm4
elif selected_period == 5:
    selected_network = networkm5
elif selected_period == 6:
    selected_network = networkm6

In [49]:
# Thresholds for quintiles
bottom_quintile = 26
second_lowest_quintile = 42
# Node colors are determined based on the attributes speriod5 and chperiod5, with inside and frame colors applied for the country-level view.
if country_or_institution_view == "c" and color_using_index == "yes":
    for node in selected_network.nodes():
        speriod5 = selected_network.nodes[node].get('speriod5', 0)
        chperiod5 = selected_network.nodes[node].get('chperiod5', 0)
        
        # Inside (fill) color
        if node == selected_country:
            selected_network.nodes[node]['color'] = "white"
        elif 1 < speriod5 < bottom_quintile:
            selected_network.nodes[node]['color'] = "red"
        elif 1 < speriod5 < second_lowest_quintile:
            selected_network.nodes[node]['color'] = "orange"
        else:
            selected_network.nodes[node]['color'] = "gray"
        
        # Frame (outline) color
        if chperiod5 == "-1":
            selected_network.nodes[node]['frame_color'] = "red"
        elif chperiod5 == "1":
            selected_network.nodes[node]['frame_color'] = "green"
        else:
            selected_network.nodes[node]['frame_color'] = "black"
elif country_or_institution_view == "c" and color_using_index == "no":
    for node in selected_network.nodes():
        selected_network.nodes[node]['color'] = "white" if node == "US" else "gray"

In [50]:
#pip install --upgrade pyvis

In [51]:
#pip install Jinja2

In [52]:
from bs4 import BeautifulSoup

In [54]:
# Create a PyVis network for visualization
net = Network(height="1000px", width="1000px", directed=True, bgcolor="white", notebook=False)

# Enable Physics for better layout spacing
net.set_options("""
var options = {
  "physics": {
    "enabled": true,
    "barnesHut": {
      "gravitationalConstant": -20000,
      "centralGravity": 0.3,
      "springLength": 95,
      "springConstant": 0.04
    },
    "solver": "barnesHut"
  },
  "edges": {
    "smooth": {
      "type": "dynamic"
    }
  }
}
""")

# Add nodes with attributes
pagerank = nx.pagerank(selected_network)  # Calculate PageRank for sizing nodes
for node in selected_network.nodes(data=True):
    node_id = node[0]
    attrs = node[1]
    size = np.sqrt(pagerank.get(node_id, 1)) * 70  # Scale size by PageRank
    color = attrs.get("color", "gray")  # Default color is gray
    net.add_node(node_id, title=node_id, color=color, size=size)

# Add edges with attributes
weights = [d.get("weight", 1) for (_, _, d) in selected_network.edges(data=True)]
median_weight = np.median(weights) if weights else 1

for edge in selected_network.edges(data=True):
    from_node, to_node, attrs = edge
    weight = attrs.get("weight", median_weight)
    edge_multiplier = attrs.get("edge_multiplier", 1)
    color = "green" if edge_multiplier == 1 else "gray"
    width = np.sqrt(weight / median_weight) * 2  # Normalize width
    net.add_edge(from_node, to_node, color=color, width=width)

# Generate the HTML content
html_content = net.generate_html()

# Use BeautifulSoup to refine the generated HTML further
soup = BeautifulSoup(html_content, 'html.parser')

# Modify the network visualization's background and interactivity options
canvas = soup.find('div', {'id': 'mynetwork'})
if canvas:
    canvas['style'] = 'background-color: lightblue;'  # Light blue background for aesthetics

# Save the refined HTML
with open("improved_network.html", "w", encoding="utf-8") as f:
    f.write(str(soup))

print("Improved network visualization saved as 'improved_network.html'")


Improved network visualization saved as 'improved_network.html'


In [66]:
from networkx.algorithms.community import greedy_modularity_communities

# Generate positions using spring_layout
pos = nx.spring_layout(selected_network, k=1.5, iterations=500)

# Create PyVis network
net = Network(height="1500px", width="1500px", directed=True, bgcolor="white", notebook=False)

# Set physics options for node repulsion
net.set_options("""
{
  "physics": {
    "enabled": true,
    "barnesHut": {
      "gravitationalConstant": -200000,
      "centralGravity": 0.01,
      "springLength": 400,
      "springConstant": 0.005
    },
    "minVelocity": 0.2
  },
  "edges": {
    "smooth": {
      "type": "curvedCW",
      "roundness": 0.2
    }
  },
  "layout": {
    "hierarchical": {
      "enabled": false
    }
  }
}
""")

# Detect communities and assign groups to nodes
communities = list(greedy_modularity_communities(selected_network))
for i, community in enumerate(communities):
    for node in community:
        selected_network.nodes[node]['group'] = i

# Add nodes with positions, sizes, and clustering
pagerank = nx.pagerank(selected_network)
for node, coords in pos.items():
    size = np.sqrt(pagerank.get(node, 1)) * 120  # Scale node size
    group = selected_network.nodes[node].get('group', 0)  # Assign group if available
    net.add_node(node, x=coords[0] * 1000, y=coords[1] * 1000, size=size, group=group, title=node)

# Add edges with filtering and normalization
weights = [attr.get("weight", 1) for _, _, attr in selected_network.edges(data=True)]
median_weight = np.median(weights) if weights else 1

threshold = 0.5  # Filter threshold for edge weights
for edge in selected_network.edges(data=True):
    from_node, to_node, attr = edge
    weight = attr.get("weight", median_weight)
    if weight > threshold:  # Include only significant edges
        width = max(weight / median_weight, 1) * 2  # Normalize edge width
        net.add_edge(from_node, to_node, width=width, color=attr.get("color", "gray"))

# Save the visualization to an HTML file
net.write_html("final_network_visualization.html")
print("Final network visualization saved as 'final_network_visualization.html'")


Final network visualization saved as 'final_network_visualization.html'


In [73]:
print(f"Number of nodes: {len(filtered_graph.nodes())}")
print(f"Number of edges: {len(filtered_graph.edges())}")


Number of nodes: 21
Number of edges: 26


In [74]:
from networkx.algorithms.community import greedy_modularity_communities

# Filter edges with a relaxed threshold
threshold = 0.5
filtered_edges = [
    (u, v, attr) for u, v, attr in selected_network.edges(data=True) if attr.get("weight", 0) > threshold
]
filtered_graph = nx.DiGraph()
filtered_graph.add_edges_from(filtered_edges)

# Ensure there are nodes
if len(filtered_graph.nodes()) == 0 or len(filtered_graph.edges()) == 0:
    print("No nodes or edges to visualize. Lowering threshold.")
    threshold = 0.1  # Adjust threshold to include more edges

# Calculate positions
pos = nx.spring_layout(filtered_graph, k=1.5, iterations=300)

# Create PyVis network
net = Network(height="1500px", width="1500px", directed=True, bgcolor="white", notebook=False)

# Reset physics settings
net.set_options("""
{
  "physics": {
    "enabled": true,
    "barnesHut": {
      "gravitationalConstant": -30000,
      "centralGravity": 0.3,
      "springLength": 150,
      "springConstant": 0.05
    },
    "minVelocity": 0.75
  }
}
""")

# Add nodes
for node, coords in pos.items():
    size = 50  # Default size for all nodes
    group = 0  # Default group assignment
    net.add_node(node, x=coords[0] * 1000, y=coords[1] * 1000, size=size, group=group, title=node)

# Add edges
for u, v, attr in filtered_graph.edges(data=True):
    weight = attr.get("weight", 1)
    net.add_edge(u, v, width=max(weight, 1), color="gray")

# Save the network visualization
net.write_html("debugged_network.html")
print("Network visualization saved as 'debugged_network.html'")


Network visualization saved as 'debugged_network.html'


In [75]:
for node, coords in pos.items():
    print(f"Node: {node}, Position: {coords}")

Node: DE, Position: [-0.1633955  -0.02955287]
Node: XX, Position: [-0.16340719 -0.02949632]
Node: BE, Position: [-0.74566968 -0.55402593]
Node: CA, Position: [0.77529597 0.61539905]
Node: CH, Position: [0.4191425  0.86185304]
Node: CL, Position: [-0.28161696 -0.90101781]
Node: CN, Position: [-0.53324403  0.78237014]
Node: EC, Position: [ 0.26540387 -0.92738046]
Node: ES, Position: [-0.16885109 -0.0302077 ]
Node: HK, Position: [-0.88720224  0.10022155]
Node: IN, Position: [ 0.74427281 -0.67120634]
Node: IT, Position: [-0.03912195  0.93580186]
Node: JM, Position: [-0.16353379 -0.02835806]
Node: JP, Position: [-0.16264399 -0.03101203]
Node: KR, Position: [ 1.         -0.17184169]
Node: MX, Position: [0.96222023 0.23042822]
Node: PA, Position: [-0.16337713 -0.02951624]
Node: US, Position: [-0.16337733 -0.02951312]
Node: GB, Position: [-0.21581185 -0.03730018]
Node: SC, Position: [-0.15865108 -0.01893418]
Node: TW, Position: [-0.15643157 -0.03671097]


In [76]:
for u, v, attr in filtered_graph.edges(data=True):
    print(f"Edge from {u} to {v}, Attributes: {attr}")

Edge from DE to XX, Attributes: {'weight': 15856874.0, 'color': 'gray'}
Edge from XX to BE, Attributes: {'weight': 3070387.0, 'color': 'gray'}
Edge from XX to CA, Attributes: {'weight': 140488.0, 'color': 'gray'}
Edge from XX to CH, Attributes: {'weight': 646000.0, 'color': 'gray'}
Edge from XX to CL, Attributes: {'weight': 2000000.0, 'color': 'gray'}
Edge from XX to CN, Attributes: {'weight': 2769490.0, 'color': 'gray'}
Edge from XX to DE, Attributes: {'weight': 31111896.0, 'color': 'gray'}
Edge from XX to EC, Attributes: {'weight': 65900.0, 'color': 'gray'}
Edge from XX to ES, Attributes: {'weight': 9951066.0, 'color': 'gray'}
Edge from XX to HK, Attributes: {'weight': 227179.0, 'color': 'gray'}
Edge from XX to IN, Attributes: {'weight': 77690.0, 'color': 'gray'}
Edge from XX to IT, Attributes: {'weight': 80000.0, 'color': 'gray'}
Edge from XX to JM, Attributes: {'weight': 97579219.0, 'color': 'gray'}
Edge from XX to JP, Attributes: {'weight': 3145.0, 'color': 'gray'}
Edge from XX to

In [77]:
for node, coords in pos.items():
    net.add_node(node, x=coords[0] * 1000, y=coords[1] * 1000, size=50, label=node, color="blue")  # Default blue nodes


In [83]:
from pyvis.network import Network
import networkx as nx
import numpy as np
from networkx.algorithms.community import greedy_modularity_communities

# Step 1: Filter edges aggressively
threshold = 1.0  # Adjust threshold for edge weights
filtered_edges = [
    (u, v, attr) for u, v, attr in selected_network.edges(data=True) if attr.get("weight", 0) > threshold
]
filtered_graph = nx.DiGraph()
filtered_graph.add_edges_from(filtered_edges)

# Step 2: Generate positions with more relaxed spacing
pos = nx.spring_layout(filtered_graph, k=2.0, iterations=800)

# Step 3: Create PyVis network
net = Network(height="1500px", width="1500px", directed=True, bgcolor="white", notebook=False)

# Step 4: Apply physics for balanced node distribution
net.set_options("""
{
  "physics": {
    "enabled": true,
    "barnesHut": {
      "gravitationalConstant": -1000000,
      "centralGravity": 0.0005,
      "springLength": 500,
      "springConstant": 0.001
    },
    "minVelocity": 0.1
  },
  "edges": {
    "smooth": {
      "type": "dynamic",
      "roundness": 0.3
    }
  }
}
""")

# Step 5: Add nodes with normalized sizes
pagerank = nx.pagerank(filtered_graph)
for node, coords in pos.items():
    size = max(30, np.sqrt(pagerank.get(node, 1)) * 100)  # Normalize node sizes
    net.add_node(node, x=coords[0] * 1000, y=coords[1] * 1000, size=size, label=node, color="blue")

# Step 6: Add edges with normalized widths
for u, v, attr in filtered_graph.edges(data=True):
    weight = attr.get("weight", 1)
    net.add_edge(u, v, width=max(weight / 2, 1), color="gray")  # Normalize edge widths

# Step 7: Save the visualization
net.write_html("balanced_network_visualization.html")
print("Balanced network visualization saved as 'balanced_network_visualization.html'")


Balanced network visualization saved as 'balanced_network_visualization.html'


Conducting a SWIFT network data analysis involves exploring various aspects of
connectivity, transaction flow, centrality measures, and visualizations. Below, I will outline
several key questions that can be addressed through Python scripts, along with example
scripts for each analytical aspect based on the architecture of your data.
Key Questions for SWIFT Network Data Analysis
1. Who are the central nodes in the network?
o Analyze which nodes (countries or institutions) are the most influential
based on centrality measures (e.g., degree centrality, betweenness
centrality).
2. What are the transaction flows between nodes?
o Measure the volume and weight of transactions between nodes to identify
significant flows.
3. How do clusters exist within the network?
o Identify communities or clusters within the network to understand how
nodes group together.
4. What is the distribution of transactions over time?
o Analyze how the transaction volume varies over time to identify trends or
anomalies.
5. Visualize the network with di􀆯erent emphasis based on specific metrics.
o Create various visualizations to highlight di􀆯erent attributes, such as
transaction volume or centrality metrics.

1. Centrality Analysis
This script calculates degree centrality and betweenness centrality for nodes in the
network.

In [84]:
# Assuming selected_network is your graph
degree_centrality = nx.degree_centrality(selected_network)
betweenness_centrality = nx.betweenness_centrality(selected_network)
# Create a DataFrame to store centrality measures
centrality_df = pd.DataFrame({
'Node': degree_centrality.keys(),
'Degree Centrality': degree_centrality.values(),
'Betweenness Centrality': betweenness_centrality.values()
})
# Sort by Degree Centrality
centrality_df = centrality_df.sort_values(by='Degree Centrality', ascending=False)
print(centrality_df)

   Node  Degree Centrality  Betweenness Centrality
1    XX                  1                       0
0    DE                  0                       0
12   JM                  0                       0
17   US                  0                       0
16   PA                  0                       0
8    ES                  0                       0
13   JP                  0                       0
19   SC                  0                       0
18   GB                  0                       0
15   MX                  0                       0
14   KR                  0                       0
10   IN                  0                       0
11   IT                  0                       0
9    HK                  0                       0
7    EC                  0                       0
6    CN                  0                       0
5    CL                  0                       0
4    CH                  0                       0
3    CA                  0     

2. Transaction Flow Analysis
   This script analyzes transaction volumes and weights between nodes.

In [85]:
# Assuming edges have attributes 'weight' corresponding to transaction volume
transaction_flow = pd.DataFrame(selected_network.edges(data=True), columns=['Source',
'Target', 'Attributes'])
transaction_flow['Weight'] = transaction_flow['Attributes'].apply(lambda x: x['weight'])
# Group by source and target for total transactions
flow_summary = transaction_flow.groupby(['Source', 'Target']).agg({'Weight':
'sum'}).reset_index()
print(flow_summary)

   Source Target     Weight
0      DE     XX   15856874
1      ES     XX      32283
2      GB     XX         51
3      JM     XX    2513061
4      JP     XX     806726
5      PA     XX  101771169
6      SC     XX       4056
7      TW     XX       6210
8      US     XX 8735664098
9      XX     BE    3070387
10     XX     CA     140488
11     XX     CH     646000
12     XX     CL    2000000
13     XX     CN    2769490
14     XX     DE   31111896
15     XX     EC      65900
16     XX     ES    9951066
17     XX     HK     227179
18     XX     IN      77690
19     XX     IT      80000
20     XX     JM   97579219
21     XX     JP       3145
22     XX     KR     761104
23     XX     MX     722139
24     XX     PA   13238145
25     XX     US 6522511700


3. Community Detection
Using the Girvan-Newman method, this script detects communities in the network.

In [87]:
from networkx.algorithms import community
# Using Girvan-Newman method for community detection
comp = community.girvan_newman(selected_network)
# Get the first level of communities
first_level_communities = next(comp)
# Display communities
for idx, comm in enumerate(first_level_communities):
    print(f"Community {idx + 1}: {comm}")

Community 1: {'DE'}
Community 2: {'IT', 'US', 'ES', 'BE', 'CN', 'KR', 'MX', 'CA', 'JM', 'JP', 'CL', 'EC', 'PA', 'XX', 'HK', 'TW', 'SC', 'IN', 'GB', 'CH'}
