# Import libraries and Create Reference Data

In [1]:
import pandas as pd 
import datetime
import random

# Set variables
credit_ratings = ['AAA', 'AA', 'A', 'BBB']
collateral_types = ['Government Bond', 'Corporate Bond', 'Equity', 'Treasury Bill', 'Agency MBS']
repo_types = ['Repo', 'Reverse Repo', 'Open Repo', 'Term Repo', 'Overnight Reverse Repo', 'Tri-Party Repo', 'Standing Repo Facility']

haircut_ranges = {
    'Government Bond': (0.01, 0.03), # 1-3%
    'Corporate Bond': (0.03, 0.10), # 3-10%
    'Equity': (0.05, 0.20), # 5-20%  
    'Treasury Bill': (0.005, 0.015), # .5-1.5%
    'Agency MBS': (0.02, 0.05) # 2-5%
}


In [2]:
print(credit_ratings)
print(collateral_types)
print(repo_types)

['AAA', 'AA', 'A', 'BBB']
['Government Bond', 'Corporate Bond', 'Equity', 'Treasury Bill', 'Agency MBS']
['Repo', 'Reverse Repo', 'Open Repo', 'Term Repo', 'Overnight Reverse Repo', 'Tri-Party Repo', 'Standing Repo Facility']


# Create Counterparties Data Frame

In [3]:
num_counterparties = 50

counterparties = pd.DataFrame({
    'CounterpartyID': range(1, num_counterparties+1),
    'CounterpartyName': [f'Party {i}' for i in range(1, num_counterparties + 1)],
    'CreditRating': [random.choice(credit_ratings) for _ in range(num_counterparties)]
})

print(counterparties)


    CounterpartyID CounterpartyName CreditRating
0                1          Party 1          AAA
1                2          Party 2          BBB
2                3          Party 3            A
3                4          Party 4          AAA
4                5          Party 5            A
5                6          Party 6          AAA
6                7          Party 7            A
7                8          Party 8          BBB
8                9          Party 9           AA
9               10         Party 10          BBB
10              11         Party 11           AA
11              12         Party 12            A
12              13         Party 13            A
13              14         Party 14          BBB
14              15         Party 15          BBB
15              16         Party 16          BBB
16              17         Party 17            A
17              18         Party 18          BBB
18              19         Party 19           AA
19              20  

# Create Collateral

In [165]:
num_collateral = 200
collateral = pd.DataFrame({
    'CollateralID': range(1, num_collateral+1),
    'CollateralType': [random.choice(collateral_types) for _ in range(num_collateral)],
    'MarketValue': [random.uniform(1000000, 50000000) for _ in range(num_collateral)]
})

collateral['Haircut'] = collateral.apply(lambda row: random.uniform(*haircut_ranges[row['CollateralType']]), axis=1)

# GENERATE TRANSACTIONS

In [29]:
import pandas as pd
import random
from datetime import datetime, timedelta
import string

# Define the date range for start dates
start_date_range = datetime(2024, 1, 1)
end_date_range = datetime(2025, 3, 31)
days_between = (end_date_range - start_date_range).days

def generate_transaction():
    random_days = random.randint(0, days_between)
    start_date = start_date_range + timedelta(days=random_days)
    maturity_date = start_date + timedelta(days=1)  # Overnight repo

    transaction_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))

    return {
        "ID": transaction_id,
        "startDate": start_date.strftime("%Y-%m-%d"),
        "maturityDate": maturity_date.strftime("%Y-%m-%d"),
        "cashAmount": round(random.uniform(1_000_000, 10_000_000), 2),
        "repoType": random.choice(["Tri-Party", "Bilateral"]),
        "repoRate": round(random.uniform(0.01, 0.10), 4),
        "InitialMargin": round(random.uniform(0.01, 0.03), 4)
    }

# Generate 500 transactions
transactions = [generate_transaction() for _ in range(500)]

# Convert to DataFrame for easier viewing or exporting
df_transactions = pd.DataFrame(transactions)

# Display the first few rows
df_transactions.head()


Unnamed: 0,ID,startDate,maturityDate,cashAmount,repoType,repoRate,InitialMargin
0,1MFSX1Z5,2025-02-20,2025-02-21,1753396.98,Bilateral,0.0345,0.0103
1,PKYR0VQ7,2024-05-28,2024-05-29,7549623.8,Bilateral,0.0924,0.0266
2,FKE4ALON,2024-12-11,2024-12-12,2098801.09,Tri-Party,0.0854,0.023
3,AVR4AOLO,2024-07-19,2024-07-20,4500735.53,Bilateral,0.0752,0.0151
4,IDKLL5NB,2024-05-23,2024-05-24,2834079.14,Bilateral,0.0576,0.0299


# Print Counterparties and Transactions Test Data

In [30]:
print("Counterparties:\n", counterparties.head())
# print("\nCollateral:\n", collateral.head())
print("\nRepo Transactions:\n", df_transactions.head())

counterparties.to_csv('counterparties.csv', index=False)
# collateral.to_csv('collateral.csv', index=False)
df_transactions.to_csv('transactions.csv', index=False)

Counterparties:
    CounterpartyID CounterpartyName CreditRating
0               1          Party 1          AAA
1               2          Party 2          BBB
2               3          Party 3            A
3               4          Party 4          AAA
4               5          Party 5            A

Repo Transactions:
          ID   startDate maturityDate  cashAmount   repoType  repoRate  \
0  1MFSX1Z5  2025-02-20   2025-02-21  1753396.98  Bilateral    0.0345   
1  PKYR0VQ7  2024-05-28   2024-05-29  7549623.80  Bilateral    0.0924   
2  FKE4ALON  2024-12-11   2024-12-12  2098801.09  Tri-Party    0.0854   
3  AVR4AOLO  2024-07-19   2024-07-20  4500735.53  Bilateral    0.0752   
4  IDKLL5NB  2024-05-23   2024-05-24  2834079.14  Bilateral    0.0576   

   InitialMargin  
0         0.0103  
1         0.0266  
2         0.0230  
3         0.0151  
4         0.0299  


## Install Neo4j library

In [31]:
pip install neo4j


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Make a connection to the Neo4j Database. Create two dataframes to store counterparties and transactions from the csvs

In [None]:
from neo4j import GraphDatabase
import pandas as pd

# Load your CSVs (make sure they're in the same directory or update path)
df_counterparties = pd.read_csv("counterparties.csv")
df_transactions = pd.read_csv("transactions.csv")

# Connection credentials
NEO4J_URI = "neo4j://ec2-44-215-43-76.compute-1.amazonaws.com:7687"  # Update this
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "admin"  # Replace with actual password

# Set up the driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))


## Load Counteries and Transactions Data into the Neo4j Cluster

In [17]:
def load_counterparties(tx, row):
    query = """
    MERGE (c:Counterparty {id: $CounterpartyID})
    SET c.name = $CounterpartyName,
        c.rating = $CreditRating
    """
    tx.run(query, **row)

def load_transactions(tx, row):
    query = """
    MERGE (t:Transaction {id: $ID})
    SET t.startDate = $startDate,
        t.maturityDate = $maturityDate,
        t.cashAmount = $cashAmount,
        t.repoType = $repoType,
        t.repoRate = $repoRate,
        t.InitialMargin = $InitialMargin
    """
    tx.run(query, **row)


## Write the data into Neo4j

In [18]:
import pandas as pd

# Load the CSVs
df_counterparties = pd.read_csv("counterparties.csv")
df_transactions = pd.read_csv("transactions.csv")

# Write data into Neo4j
with driver.session() as session:
    for _, row in df_counterparties.iterrows():
        session.write_transaction(load_counterparties, row.to_dict())

    for _, row in df_transactions.iterrows():
        session.write_transaction(load_transactions, row.to_dict())


  session.write_transaction(load_counterparties, row.to_dict())
  session.write_transaction(load_transactions, row.to_dict())


## Define the relationships between Counterparty to Transactions
### For this initial set of relationships for a single transaction depending on the repo type (Tri-Party, Bilateral) the following relationship is defined.

### For a given single <b>Tri-Party</b> repo type transaction, there will be 3 unique counter parties associated with the transaction.  Each counterparty will either be a <b>Lender, Borrower, or Intermediary</b>

### For a given single <b>Bilateral</b> repo type transaction, there will be 2 unique counter parties associated with the transaction.  Each counterparty will either be a <b>Lender or Borrower </b>

 

In [26]:
import random

# All available counterparty IDs
counterparty_ids = df_counterparties["CounterpartyID"].tolist()

# Role sets for each transaction type
tri_party_roles = ["lends", "borrows", "intermediates"]
bilateral_roles = ["lends", "borrows"]

# Clear existing relationships first if needed
def clear_existing_relationships(tx):
    tx.run("""
    MATCH (c:Counterparty)-[r]->(t:Transaction)
    DELETE r
    """)

# Relationship creation logic based on repo type
def link_transaction_with_roles(tx, transaction_id, repo_type, counterparties_roles):
    for counterparty_id, role in counterparties_roles:
        query = f"""
        MATCH (c:Counterparty {{id: $counterparty_id}})
        MATCH (t:Transaction {{id: $transaction_id}})
        MERGE (c)-[r:{role.upper()}]->(t)
        """
        tx.run(query, counterparty_id=counterparty_id, transaction_id=transaction_id)

# Apply the logic
with driver.session() as session:
    # Optional: Clear previous relationships
    session.write_transaction(clear_existing_relationships)

    used_assignments = set()  # to track used (transaction_id, counterparty_id) pairs

    for _, row in df_transactions.iterrows():
        tx_id = row["ID"]
        repo_type = row["repoType"]
        
        # Ensure unique counterparties per transaction
        if repo_type == "Tri-Party":
            selected_roles = tri_party_roles
        elif repo_type == "Bilateral":
            selected_roles = bilateral_roles
        else:
            continue  # Skip unknown repo types
        
        # Get N unique counterparties for the transaction
        selected_counterparties = random.sample(counterparty_ids, len(selected_roles))
        
        # Assign roles to those counterparties
        counterparties_roles = list(zip(selected_counterparties, selected_roles))
        
        # Ensure uniqueness across transaction/role/counterparty
        if all((tx_id, cp_id) not in used_assignments for cp_id, _ in counterparties_roles):
            # Register assignments to prevent duplicates
            for cp_id, _ in counterparties_roles:
                used_assignments.add((tx_id, cp_id))
            
            # Write to Neo4j
            session.write_transaction(link_transaction_with_roles, tx_id, repo_type, counterparties_roles)


  session.write_transaction(clear_existing_relationships)
[#D4DD]  _: <CONNECTION> error: Failed to read from defunct connection ResolvedIPv4Address(('44.215.43.76', 7687)) (ResolvedIPv4Address(('44.215.43.76', 7687))): ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)
Unable to retrieve routing information
Transaction failed and will be retried in 0.852247244825661s (Unable to retrieve routing information)
  session.write_transaction(link_transaction_with_roles, tx_id, repo_type, counterparties_roles)
