### Imports
---

In [1]:
import pandas as pd
from faker import Faker
import random
import pandas as pd
from tqdm import tqdm

### Load data
---

In [None]:
dataframe = pd.read_csv("src/data/HI-Small_Trans.csv")

In [None]:
dataframe.head(2)

In [None]:
dataframe.shape

### Transformations
---

In [None]:
filtered_dataframe = dataframe[dataframe["Amount Paid"] == dataframe["Amount Received"]]

In [None]:
filtered_dataframe.columns = filtered_dataframe.columns.str.lower()
filtered_dataframe.columns = filtered_dataframe.columns.str.replace(' ', "_")

In [None]:
filtered_dataframe[["from_bank", "to_bank"]] = filtered_dataframe[["from_bank", "to_bank"]].astype(str)

In [None]:
filtered_dataframe.head(2)

In [None]:
filtered_dataframe["sender"] = filtered_dataframe["from_bank"] + "_" + filtered_dataframe["account"]
filtered_dataframe["receiver"] = filtered_dataframe["to_bank"] + "_" + filtered_dataframe["account.1"]

In [None]:
filtered_dataframe = filtered_dataframe[[
 "timestamp",
 "sender",
 "receiver",
 "amount_received",
 "receiving_currency",
 "amount_paid",
 "payment_currency",
 "payment_format",
 "is_laundering",
]]

In [None]:
filtered_dataframe = filtered_dataframe.query("sender != receiver")

In [None]:
filtered_dataframe.to_csv("src/data/full_transactions_data.csv", index=False)

In [None]:
filtered_dataframe = filtered_dataframe.groupby(["sender", "receiver"], as_index=False).agg({"amount_paid":  "sum"})
filtered_dataframe.columns = ['sender','receiver','sum']
filtered_dataframe = filtered_dataframe.rename(columns={"sum": "value"})
filtered_dataframe["title"] = filtered_dataframe.apply(lambda df: f"from: {df.sender}\nto: {df.receiver}\namount (BRL): {df.value}", axis=1)

In [None]:
filtered_dataframe.to_csv("src/data/network_analysis_data.csv", index=False)

### Generate fake customer data
---

In [None]:
all_clients = pd.read_csv("src/data/full_transactions_data.csv")

In [None]:
all_clients = all_clients.groupby(by="sender", as_index=False).timestamp.count().sort_values("timestamp", ascending=False)

In [None]:
all_clients_id = set(all_clients["sender"])

In [None]:
fake = Faker()
Faker.seed(42)

In [None]:
data = []
bar = tqdm(total=len(all_clients_id))
for client_id in all_clients_id:
    location = fake.location_on_land()
    data.append({
        "account_id": client_id,
        "name": fake.name(),
        "email": fake.email(),
        "location": f"{location[2]} - {location[3]} - {location[4]}",
        "registration_date": fake.date_this_year(),
    })
    bar.update(1)
df = pd.DataFrame(data)

In [None]:
df.head()

In [None]:
df.to_csv('src/data/accounts_details.csv', index=False)

In [2]:
df = pd.read_csv('src/data/accounts_details.csv')

In [3]:
from src.utils.accounts import get_account_details

In [4]:
get_account_details(df, "38134_8036B5120")

{'account_id': '38134_8036B5120',
 'name': 'Patrick Sanchez',
 'email': 'jillrhodes@example.net',
 'location': 'Alipur - PK - Asia/Karachi',
 'registration_date': '2025-01-05'}