### Imports
---

In [1]:
import plotly.express as px
import plotly.graph_objects as go
import altair as alt
import pandas as pd
from src.utils.dataset import get_full_transactions_dataset



### Dataframe preparation
---

In [2]:
transactions_df = get_full_transactions_dataset()

2025-02-20 21:48:38.756 
  command:

    streamlit run C:\Users\ferna\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [3]:
transactions_df.head()

Unnamed: 0,timestamp,sender,receiver,amount_received,receiving_currency,amount_paid,payment_currency,payment_format
0,2022/09/01 00:20,3208_8000F4580,1_8000F5340,0.01,US Dollar,0.01,US Dollar,Cheque
1,2022/09/01 00:26,12_8000EC280,2439_8017BF800,7.66,US Dollar,7.66,US Dollar,Credit Card
2,2022/09/01 00:21,1_8000EDEC0,211050_80AEF5310,383.71,US Dollar,383.71,US Dollar,Credit Card
3,2022/09/01 00:04,1_8000F4510,11813_8011305D0,9.82,US Dollar,9.82,US Dollar,Credit Card
4,2022/09/01 00:08,1_8000F4FE0,245335_812ED62E0,4.01,US Dollar,4.01,US Dollar,Credit Card


In [5]:
account_id = '70_100428660'

In [6]:
transactions_df = transactions_df.query("sender == @account_id or receiver == @account_id")

In [7]:
transactions_df

Unnamed: 0,timestamp,sender,receiver,amount_received,receiving_currency,amount_paid,payment_currency,payment_format
50,2022/09/01 00:25,70_100428660,10_800059F50,5105.92,US Dollar,5105.92,US Dollar,Cheque
99,2022/09/01 00:16,70_100428660,220_800132390,15509630.09,US Dollar,15509630.09,US Dollar,Cheque
100,2022/09/01 00:01,70_100428660,220_800132390,18722.79,US Dollar,18722.79,US Dollar,Credit Card
101,2022/09/01 00:03,70_100428660,220_800132390,19780972.52,US Dollar,19780972.52,US Dollar,Cash
140,2022/09/01 00:14,70_100428660,220_800190EB0,22605.52,US Dollar,22605.52,US Dollar,Cheque
...,...,...,...,...,...,...,...,...
4482482,2022/09/10 23:31,70_100428660,51182_8139CDF40,305.17,US Dollar,305.17,US Dollar,Credit Card
4482483,2022/09/10 23:44,70_100428660,51182_8139CDF40,28.42,US Dollar,28.42,US Dollar,Cash
4482487,2022/09/10 23:53,70_100428660,122080_8139E7180,289.78,US Dollar,289.78,US Dollar,Cheque
4482488,2022/09/10 23:50,70_100428660,122080_8139E7180,82.90,US Dollar,82.90,US Dollar,Credit Card


### Visualization
---

In [8]:
transactions_timeline_df = transactions_df.copy()
transactions_timeline_df["date"] = pd.to_datetime(transactions_timeline_df["timestamp"]).dt.date

In [9]:
received_transactions = transactions_timeline_df.query("receiver == @account_id").groupby('date', as_index=False)['amount_paid'].sum()
sent_transactions = transactions_timeline_df.query("sender == @account_id").groupby('date', as_index=False)['amount_paid'].sum()

In [10]:
received_transactions

Unnamed: 0,date,amount_paid
0,2022-09-01,0.19
1,2022-09-02,239797.3
2,2022-09-09,239797.3


In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=received_transactions.date, y=received_transactions.amount_paid, name='Received amount', line=dict(color="#007bff")))
fig.add_trace(go.Scatter(x=sent_transactions.date, y=sent_transactions.amount_paid, name='Sent amount', line=dict(color='#fd7e14')))
fig.show()

In [12]:
payment_type_df = transactions_df.copy()
payment_type_df = payment_type_df.groupby("payment_format", as_index=False).amount_paid.sum()

In [13]:
custom_colors = {
    "Cheque": "#4E79A7",      
    "Credit Card": "#F28E2B", 
    "ACH": "#E15759",         
    "Cash": "#76B7B2",        
    "Wire": "#59A14F",        
    "Bitcoin": "#D4A157"
}

In [14]:
fig = px.pie(
    payment_type_df, 
    names="payment_format", 
    values="amount_paid", 
    title="Payment Method Distribution",
    hole=0.4,
    color="payment_format",
    color_discrete_map=custom_colors
)
fig.show()

In [None]:
fig = px.histogram(
    transactions_df,
    x="amount_paid",
    title="Transaction Value Distribution",
    labels={"amount_paid": "Transaction Amount"},
    color_discrete_sequence=["#F28E2B"],
    nbins=20
)

# Melhorando layout
fig.update_layout(
    xaxis_title="Transaction Amount (USD)",
    yaxis_title="Frequency",
    bargap=0.1,
    yaxis_type = 'log'
)

In [None]:
df = transactions_df.copy()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["hour"] = df["timestamp"].dt.hour
df["weekday"] = df["timestamp"].dt.day_name()

heatmap_data = df.groupby(["weekday", "hour"], as_index=False)["amount_paid"].sum()

weekday_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
heatmap_data["weekday"] = pd.Categorical(heatmap_data["weekday"], categories=weekday_order, ordered=True)

fig = px.density_heatmap(
    heatmap_data,
    x="hour",
    y="weekday",
    z="amount_paid",
    title="Transaction Heatmap (Hour vs. Weekday)",
    color_continuous_scale="Reds",
    nbinsx=24
)
fig.show()