In [1]:
import pandas as pd
import numpy as np

In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)

n = 200  # Number of transactions

dates = pd.date_range("2024-01-01", periods=n, freq="D")
amounts = np.random.normal(loc=-50, scale=30, size=n).round(2)
descriptions = np.random.choice([
    "Whole Foods", "Uber", "Netflix", "Rent Payment", "Starbucks", "Amazon", "Gas Station", "Bookstore", "Gym", "Water Bill"
], size=n)
main_categories = np.random.choice([
    "Groceries", "Transport", "Utilities", "Entertainment", "Rent"
], size=n)
subcategories = np.random.choice([
    "Supermarket", "Taxi", "Streaming", "Apartment Rent", "Coffee Shop", "Online Shopping", "Gas", "Books", "Gym Membership", "Water Bill"
], size=n)

df = pd.DataFrame({
    "date": dates,
    "amount": amounts,
    "description": descriptions,
    "category": main_categories,
    "subcategory": subcategories
})

df.to_csv("classified_transactions.csv", index=False)
df.head()

Unnamed: 0,date,amount,description,category,subcategory
0,2024-01-01,-35.1,Uber,Groceries,Gas
1,2024-01-02,-54.15,Water Bill,Rent,Water Bill
2,2024-01-03,-30.57,Whole Foods,Rent,Streaming
3,2024-01-04,-4.31,Bookstore,Transport,Supermarket
4,2024-01-05,-57.02,Whole Foods,Transport,Coffee Shop


In [3]:
import pandas as pd
import numpy as np

np.random.seed(42)

credit_cards = [
    "CashBack Plus", "Everyday Rewards", "Super Saver", "Travel Perks", "Grocer's Choice"
]
categories = [
    "Groceries", "Transport", "Utilities", "Entertainment", "Rent", "Dining", "Online Shopping", "Gas", "Travel"
]

rows = []
for card in credit_cards:
    # Each card gives cashback on 3-5 random categories
    n_cats = np.random.randint(3, 6)
    chosen_cats = np.random.choice(categories, size=n_cats, replace=False)
    for cat in chosen_cats:
        cashback = np.round(np.random.uniform(1, 5), 1)  # 1% to 5%
        rows.append({
            "credit_card": card,
            "category": cat,
            "cashback_percent": cashback
        })

cc_df = pd.DataFrame(rows)
cc_df.to_csv("credit_card_cashback.csv", index=False)
cc_df

Unnamed: 0,credit_card,category,cashback_percent
0,CashBack Plus,Travel,1.6
1,CashBack Plus,Transport,1.2
2,CashBack Plus,Dining,4.5
3,CashBack Plus,Groceries,3.4
4,CashBack Plus,Gas,3.8
5,Everyday Rewards,Travel,2.2
6,Everyday Rewards,Utilities,3.1
7,Everyday Rewards,Groceries,2.7
8,Everyday Rewards,Online Shopping,2.2
9,Super Saver,Transport,3.7


In [5]:
import pandas as pd

# Load transactions and credit card cashback data
transactions = pd.read_csv("classified_transactions.csv")
cc_df = pd.read_csv("credit_card_cashback.csv")

# Ensure category columns match
transactions['category'] = transactions['category']

# Only consider negative amounts as spending
spending = transactions[transactions['amount'] < 0].copy()

# Calculate total cashback for each credit card
results = []
for card in cc_df['credit_card'].unique():
    card_cats = cc_df[cc_df['credit_card'] == card]
    total_cashback = 0.0
    for _, row in card_cats.iterrows():
        cat = row['category']
        percent = row['cashback_percent']
        cat_spending = spending[spending['category'] == cat]['amount'].sum()
        cashback = abs(cat_spending) * (percent / 100)
        total_cashback += cashback
    results.append({'credit_card': card, 'total_cashback': round(total_cashback, 2)})

cashback_df = pd.DataFrame(results)
cashback_df = cashback_df.sort_values('total_cashback', ascending=False)
cashback_df

Unnamed: 0,credit_card,total_cashback
4,Grocer's Choice,278.29
2,Super Saver,179.0
1,Everyday Rewards,105.59
0,CashBack Plus,99.68
3,Travel Perks,49.14
