In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
files = {
    "credit_card": "../data/Credit card transactions - India - Simple.csv",
    "finance_small": "../data/Personal_Finance_Data.csv",
    "finance_expenses": "../data/personal_finance_expenses.csv",
    "finance_tracker": "../data/personal_finance_tracker_dataset.csv"
}

dfs = {name: pd.read_csv(path) for name, path in files.items()}

# Show quick info
for name, df in dfs.items():
    print(f"\n===== {name.upper()} =====")
    print("Shape:", df.shape)
    print("Columns:", df.columns.tolist())
    print("Missing values:\n", df.isnull().sum())
    print(df.head(3))

for name, df in dfs.items():
    print(f"\n===== {name.upper()} =====")
    display(df.describe(include='all').transpose())

for name, df in dfs.items():
    print(f"{name}: {df.duplicated().sum()} duplicates")

for name, df in dfs.items():
    print(f"\n{name} dtypes:\n", df.dtypes)

# Example: Expense distribution
sns.histplot(dfs["finance_expenses"]["Amount"], bins=30, kde=True)
plt.title("Distribution of Expenses")
plt.show()



===== CREDIT_CARD =====
Shape: (26052, 7)
Columns: ['index', 'City', 'Date', 'Card Type', 'Exp Type', 'Gender', 'Amount']
Missing values:
 index        0
City         0
Date         0
Card Type    0
Exp Type     0
Gender       0
Amount       0
dtype: int64
   index                   City       Date Card Type Exp Type Gender  Amount
0      0           Delhi, India  29-Oct-14      Gold    Bills      F   82475
1      1  Greater Mumbai, India  22-Aug-14  Platinum    Bills      F   32555
2      2       Bengaluru, India  27-Aug-14    Silver    Bills      F  101738

===== FINANCE_SMALL =====
Shape: (1500, 5)
Columns: ['Date', 'Transaction Description', 'Category', 'Amount', 'Type']
Missing values:
 Date                       0
Transaction Description    0
Category                   0
Amount                     0
Type                       0
dtype: int64
         Date             Transaction Description      Category   Amount  \
0  2020-01-02                         Score each.  Food & Drink 

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
index,26052.0,,,,13025.5,7520.708943,0.0,6512.75,13025.5,19538.25,26051.0
City,26052.0,986.0,"Bengaluru, India",3552.0,,,,,,,
Date,26052.0,600.0,20-Sep-14,65.0,,,,,,,
Card Type,26052.0,4.0,Silver,6840.0,,,,,,,
Exp Type,26052.0,6.0,Food,5463.0,,,,,,,
Gender,26052.0,2.0,F,13680.0,,,,,,,
Amount,26052.0,,,,156411.537425,103063.254287,1005.0,77120.25,153106.5,228050.0,998077.0



===== FINANCE_SMALL =====


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
Date,1500.0,1025.0,2023-01-24,6.0,,,,,,,
Transaction Description,1500.0,1500.0,Score each.,1.0,,,,,,,
Category,1500.0,10.0,Rent,165.0,,,,,,,
Amount,1500.0,,,,1307.520913,982.283361,14.37,629.34,1156.285,1712.9325,4996.0
Type,1500.0,2.0,Expense,1222.0,,,,,,,



===== FINANCE_EXPENSES =====


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
Income,20000.0,,,,41585.496104,40014.540538,1301.187333,17604.875957,30185.38348,51765.44646,1079728.373517
Age,20000.0,,,,41.03145,13.578725,18.0,29.0,41.0,53.0,64.0
Dependents,20000.0,,,,1.99595,1.417616,0.0,1.0,2.0,3.0,4.0
Occupation,20000.0,4.0,Retired,5019.0,,,,,,,
City_Tier,20000.0,3.0,Tier_2,10068.0,,,,,,,
Rent,20000.0,,,,9115.494629,9254.228188,235.365692,3649.422246,6402.751824,11263.940492,215945.674703
Loan_Repayment,20000.0,,,,2049.800292,4281.789941,0.0,0.0,0.0,2627.14232,123080.682009
Insurance,20000.0,,,,1455.028761,1492.938435,30.002012,580.204749,1017.124681,1787.160895,38734.932935
Groceries,20000.0,,,,5205.667493,5035.953689,154.07824,2165.426419,3741.091535,6470.892718,119816.898124
Transport,20000.0,,,,2704.466685,2666.345648,81.228584,1124.578012,1933.845509,3360.597508,81861.503457



===== FINANCE_TRACKER =====


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
date,3000.0,60.0,2019-01-01,50.0,,,,,,,
user_id,3000.0,,,,1498.699,287.352782,1000.0,1248.75,1496.5,1749.0,1999.0
monthly_income,3000.0,,,,4004.267347,1000.107096,685.28,3362.25,4008.075,4659.04,7407.94
monthly_expense_total,3000.0,,,,3011.68223,801.151864,159.21,2473.205,3023.81,3555.5025,5853.2
savings_rate,3000.0,,,,0.225897,0.101417,0.05,0.14,0.23,0.31,0.4
budget_goal,3000.0,,,,2811.070463,490.855344,1175.57,2481.9575,2822.285,3131.0,4386.5
financial_scenario,3000.0,3.0,normal,1739.0,,,,,,,
credit_score,3000.0,,,,679.923667,49.970847,515.0,646.0,679.0,713.0,847.0
debt_to_income_ratio,3000.0,,,,0.350817,0.145191,0.1,0.22,0.35,0.48,0.6
loan_payment,3000.0,,,,508.58113,199.44458,0.0,375.34,508.57,638.47,1176.88


credit_card: 0 duplicates
finance_small: 0 duplicates
finance_expenses: 0 duplicates
finance_tracker: 0 duplicates

credit_card dtypes:
 index         int64
City         object
Date         object
Card Type    object
Exp Type     object
Gender       object
Amount        int64
dtype: object

finance_small dtypes:
 Date                        object
Transaction Description     object
Category                    object
Amount                     float64
Type                        object
dtype: object

finance_expenses dtypes:
 Income                             float64
Age                                  int64
Dependents                           int64
Occupation                          object
City_Tier                           object
Rent                               float64
Loan_Repayment                     float64
Insurance                          float64
Groceries                          float64
Transport                          float64
Eating_Out                         floa

ModuleNotFoundError: No module named 'seaborn'