In [74]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from prophet import Prophet

from sklearn.metrics import mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
pd.set_option('display.max_colwidth', None)  # Display full content of each column
pd.set_option('display.max_columns', None)   # Display all columns
pd.set_option('display.width', 5000)         # Set display width

plt.style.use('ggplot')
plt.style.use('fivethirtyeight')


In [75]:
df = pd.read_csv("Features.csv")  # Update with your file path
df.columns

Index(['Transaction Date', 'Description', 'Reference No./Cheque No.', 'Debit', 'Credit', 'Balance', 'Transaction Type', 'Transaction Mode', 'DR/CR Indicator', 'Transaction ID', 'Recipient Name', 'Bank', 'UPI ID', 'Note', 'dayofweek', 'quarter', 'month', 'year', 'dayofyear', 'dayofmonth', 'weekofyear', 'weekday', 'is_weekend', 'transaction_amount', 'is_large_transaction', 'Balance.1'], dtype='object')

In [73]:
import dtale

dtale.show(df)



2025-02-23 08:52:18,958 - INFO     - Executing shutdown due to inactivity...
2025-02-23 08:52:27,250 - INFO     - Executing shutdown...
2025-02-23 08:52:27,253 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer


In [97]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Features.csv")  # Replace with actual filename

# Convert Transaction Date to datetime
df["Transaction Date"] = pd.to_datetime(df["Transaction Date"], errors="coerce")

# Define Start and End Dates (Modify as needed)
start_date = "2024-01-01"
end_date = "2024-12-31"

selected_columns = ['Transaction Date', 'Debit', 'Credit', 'Balance','Transaction Mode', 'Recipient Name','Transaction ID' ,'Bank', 
    'UPI ID', 'Note', 'transaction_amount']

df = df[selected_columns]
df = df[(df["Transaction Date"] >= start_date) & (df["Transaction Date"] <= end_date)].copy()


In [99]:
df

Unnamed: 0,Transaction Date,Debit,Credit,Balance,Transaction Mode,Recipient Name,Transaction ID,Bank,UPI ID,Note,transaction_amount
0,2024-04-02,30.00,0.0,954.40,UPI,JULFIKAR,409378221768,YESB,paytmqr1jc,baker,30.00
1,2024-04-04,0.00,500.0,1454.40,INB,XX8237,9890160567,,,Son-,0.00
2,2024-04-05,64.00,0.0,1390.40,UPI,VINAYAK,409652615602,HDFC,vinayakpbh,UPI,64.00
3,2024-04-06,0.00,1500.0,2890.40,INB,XX8237,9890160567,,,Son-,0.00
4,2024-04-09,719.00,0.0,2171.40,UPI,BHARTI A,410014886005,AIRP,airtelprep,Payme,719.00
...,...,...,...,...,...,...,...,...,...,...,...
294,2024-11-04,0.00,300.0,1244.81,UPI,SAMEER B,430929085524,ICIC,sawant.123,UPI,0.00
295,2024-11-04,295.78,0.0,949.03,UPI,ZOMATO L,430902812486,UTIB,zomatoorde,UPI,295.78
296,2024-11-05,79.00,0.0,870.03,UPI,DILIPKU,467686807162,YESB,paytmqr5wr,UPI,79.00
297,2024-11-05,15.00,0.0,855.03,UPI,SHINDE R,431057312820,COSB,shinderayc,UPI,15.00


In [102]:
import pandas as pd
import panel as pn
import plotly.express as px

# Enable Panel extension
pn.extension('plotly')

df['Month'] = df['Transaction Date'].dt.strftime('%B')  # Full month name

monthly_summary = df.groupby('Month')[['Debit', 'Credit']].sum().reset_index()

month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 
               'August', 'September', 'October', 'November', 'December']
monthly_summary['Month'] = pd.Categorical(monthly_summary['Month'], categories=month_order, ordered=True)
monthly_summary = monthly_summary.sort_values('Month')

fig = px.pie(
    monthly_summary.melt(id_vars=['Month'], var_name='Type', value_name='Amount'),  
    names='Type',
    values='Amount',
    title="Monthly Credit (Income) vs. Debit (Expenses)",
    color='Type',
    color_discrete_map={'Credit': 'green', 'Debit': 'red'}
)

dashboard = pn.Column(
    "# Monthly Financial Overview",
    pn.pane.Plotly(fig),
)

dashboard.servable()


In [45]:

# Use UPI ID if available; otherwise, use Recipient Name


# Extract Date Components
df_filtered["year"] = df_filtered["Transaction Date"].dt.year
df_filtered["month"] = df_filtered["Transaction Date"].dt.month
df_filtered["weekofyear"] = df_filtered["Transaction Date"].dt.isocalendar().week
df_filtered["Transaction Date"] = df_filtered["Transaction Date"].dt.date

# Aggregate Unique UPI IDs (or Recipient Name)
agg_data = df_filtered.groupby(["Unique ID"]).agg(
    First_Transaction=("Transaction Date", "min"),  # First transaction date
    Last_Transaction=("Transaction Date", "max"),  # Last transaction date
    Total_Transactions=("Transaction Date", "count"),  # Total transactions
    Total_Debit=("Debit", "sum"),  # Sum of all debits
    Total_Credit=("Credit", "sum"),  # Sum of all credits
    Max_Balance=("Balance", "max"),  # Maximum balance recorded
    Min_Balance=("Balance", "min"),  # Minimum balance recorded
    Avg_Balance=("Balance", "mean"),  # Average balance over transactions
).reset_index()

# Count Transactions at Different Intervals
daily_counts = df_filtered.groupby(["Transaction Date", "Unique ID"]).size().reset_index(name="Daily Count")
weekly_counts = df_filtered.groupby(["year", "weekofyear", "Unique ID"]).size().reset_index(name="Weekly Count")
monthly_counts = df_filtered.groupby(["year", "month", "Unique ID"]).size().reset_index(name="Monthly Count")
yearly_counts = df_filtered.groupby(["year", "Unique ID"]).size().reset_index(name="Yearly Count")

# Merge counts into the main dataset
agg_data = agg_data.merge(daily_counts.groupby("Unique ID")["Daily Count"].sum().reset_index(), on="Unique ID", how="left")
agg_data = agg_data.merge(weekly_counts.groupby("Unique ID")["Weekly Count"].sum().reset_index(), on="Unique ID", how="left")
agg_data = agg_data.merge(monthly_counts.groupby("Unique ID")["Monthly Count"].sum().reset_index(), on="Unique ID", how="left")
agg_data = agg_data.merge(yearly_counts.groupby("Unique ID")["Yearly Count"].sum().reset_index(), on="Unique ID", how="left")

# Fill NaN values with 0 (for cases where counts might be missing)
agg_data.fillna(0, inplace=True)

# Save the final unique UPI-based transaction summary to CSV
agg_data.to_csv("unique_upi_transaction_summary.csv", index=False)

print("✅ Processed unique UPI transaction summary saved successfully!")


✅ Processed unique UPI transaction summary saved successfully!


In [66]:
agg_data

Unnamed: 0,Unique ID,First_Transaction,Last_Transaction,Total_Transactions,Total_Debit,Total_Credit,Max_Balance,Min_Balance,Avg_Balance,Daily Count,Weekly Count,Monthly Count,Yearly Count
0,0791363A00,2023-08-30,2023-08-30,1,30.00,0.00,806.92,806.92,806.920000,1,1,1,1
1,7045314562,2023-02-24,2023-02-24,1,0.00,157.00,1398.88,1398.88,1398.880000,1,1,1,1
2,7264817938,2024-06-05,2024-06-05,1,0.00,200.00,1042.31,1042.31,1042.310000,1,1,1,1
3,7eleven.42,2023-05-10,2023-05-10,1,70.00,0.00,1210.82,1210.82,1210.820000,1,1,1,1
4,8237599879,2024-06-04,2024-06-04,1,125.00,0.00,1008.31,1008.31,1008.310000,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,vinayakpbh,2023-05-10,2024-04-23,11,300.00,2276.06,5415.46,696.92,2148.612727,11,11,11,11
240,vinayracha,2023-02-22,2023-09-24,2,116.00,15.00,1752.88,403.92,1078.400000,2,2,2,2
241,vyapar.170,2024-06-15,2024-06-15,1,210.00,0.00,1493.31,1493.31,1493.310000,1,1,1,1
242,write2adit,2024-04-23,2024-04-23,1,71.00,0.00,695.02,695.02,695.020000,1,1,1,1


In [61]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Features.csv")  # Replace with actual filename

# Convert Transaction Date to datetime
df["Transaction Date"] = pd.to_datetime(df["Transaction Date"], errors="coerce")

# Define Start and End Dates (Modify as needed)
start_date = "2022-01-01"
end_date = "2025-12-31"

# Filter transactions within the date range
df_filtered = df[(df["Transaction Date"] >= start_date) & (df["Transaction Date"] <= end_date)].copy()

# Use UPI ID if available; otherwise, use Recipient Name
df_filtered["Unique ID"] = df_filtered["UPI ID"].fillna(df_filtered["Recipient Name"])

# Extract Date Components
df_filtered["year"] = df_filtered["Transaction Date"].dt.year
df_filtered["month"] = df_filtered["Transaction Date"].dt.month
df_filtered["weekofyear"] = df_filtered["Transaction Date"].dt.isocalendar().week
df_filtered["Transaction Date"] = df_filtered["Transaction Date"].dt.date

# Function to calculate daily transaction count
def calculate_daily_transactions(df):
    """Calculate the number of transactions for each unique ID per day."""
    daily_counts = df.groupby(["Transaction Date", "Unique ID"]).size().reset_index(name="Daily Count")
    return daily_counts

# Function to calculate weekly transaction count
def calculate_weekly_transactions(df):
    """Calculate the number of transactions for each unique ID per week (resetting weekly)."""
    df = df.sort_values(["Unique ID", "year", "weekofyear", "Transaction Date"])
    df["Weekly Count"] = df.groupby(["Unique ID", "year", "weekofyear"]).cumcount() + 1
    return df[["Transaction Date", "Unique ID", "Weekly Count"]]

# Function to calculate monthly transaction count
def calculate_monthly_transactions(df):
    """Calculate the number of transactions for each unique ID per month (resetting monthly)."""
    df = df.sort_values(["Unique ID", "year", "month", "Transaction Date"])
    df["Monthly Count"] = df.groupby(["Unique ID", "year", "month"]).cumcount() + 1
    return df[["Transaction Date", "Unique ID", "Monthly Count"]]

# Function to calculate total transactions till date for each unique ID
def calculate_total_transactions(df):
    """Calculate the cumulative total transactions for each unique ID."""
    df = df.sort_values(["Unique ID", "Transaction Date"])
    df["Total Transactions"] = df.groupby("Unique ID").cumcount() + 1
    return df[["Transaction Date", "Unique ID", "Total Transactions"]]

# Calculate all metrics
daily_counts = calculate_daily_transactions(df_filtered)
weekly_counts = calculate_weekly_transactions(df_filtered)
monthly_counts = calculate_monthly_transactions(df_filtered)
total_transactions = calculate_total_transactions(df_filtered)

# # Merge all calculated counts into one dataset
# df_final = df_filtered.merge(daily_counts, on=["Transaction Date", "Unique ID"], how="left")
# df_final = df_final.merge(weekly_counts, on=["Transaction Date", "Unique ID"], how="left")
# df_final = df_final.merge(monthly_counts, on=["Transaction Date", "Unique ID"], how="left")
# df_final = df_final.merge(total_transactions, on=["Transaction Date", "Unique ID"], how="left")

# # Fill NaN values with 0 (for cases where counts might be missing)
# df_final.fillna(0, inplace=True)

# Save the final transaction summary to CSV

print("✅ Processed unique UPI transaction summary saved successfully!")


✅ Processed unique UPI transaction summary saved successfully!


In [95]:
# df["UPI ID"] = df["UPI ID"].fillna(df["Recipient Name"])
# nan_rows = df[df['UPI ID'].isna() | (df['UPI ID'] == "nan")]
# nan_rows

In [62]:
print(daily_counts.shape)

print(monthly_counts.shape)
print(total_transactions.shape)
print(weekly_counts.shape)
daily_counts.isna().sum()
# df.shape

(650, 3)
(741, 3)
(741, 3)
(741, 3)


Transaction Date    0
Unique ID           0
Daily Count         0
dtype: int64

In [64]:
def print_transaction_details(df):
    """
    Prints every row with all the extracted details from the DataFrame.
    """
    for index, row in df.iterrows():
        print(f"Row {index}:")
        for col in df.columns:
            print(f"  {col}: {row[col]}")
        print("-" * 50)  # Separator for better readability
print_transaction_details(daily_counts)
# print_transaction_details(df)
        


Row 0:
  Transaction Date: 2022-05-31
  Unique ID: XX4470
  Daily Count: 1
--------------------------------------------------
Row 1:
  Transaction Date: 2022-06-18
  Unique ID: XX8237
  Daily Count: 1
--------------------------------------------------
Row 2:
  Transaction Date: 2022-06-24
  Unique ID: XX8237
  Daily Count: 1
--------------------------------------------------
Row 3:
  Transaction Date: 2022-08-27
  Unique ID: prachiswt2
  Daily Count: 2
--------------------------------------------------
Row 4:
  Transaction Date: 2022-09-10
  Unique ID: prachiswt2
  Daily Count: 1
--------------------------------------------------
Row 5:
  Transaction Date: 2022-09-10
  Unique ID: rutuswt052
  Daily Count: 1
--------------------------------------------------
Row 6:
  Transaction Date: 2022-09-19
  Unique ID: rutuswt052
  Daily Count: 1
--------------------------------------------------
Row 7:
  Transaction Date: 2022-09-20
  Unique ID: paytm-6467
  Daily Count: 1
-----------------------