In [8]:
# Importing default Libraries
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
import seaborn as sns
import warnings
import datetime 
import os 

from money_mate.params import DATA_PATH

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000

# Hi-resolution Plots and Matplotlib inline
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

# Set the maximum number of rows and columns to be displayed
warnings.filterwarnings('ignore')

# "magic commands" to enable autoreload of your imported packages
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
data = pd.read_csv(f'{DATA_PATH}/monzo.csv')
data.head(2)

Unnamed: 0,Transaction ID,Date,Time,Type,Name,Emoji,Category,Amount,Currency,Local amount,Local currency,Notes and #tags,Address,Receipt,Description,Category split
0,tx_00009jGereHTyV50ElCRLl,28/05/2019,11:30:19,Faster payment,DU PLESSIS J V B,,Income,150.0,GBP,150.0,GBP,BARCLAYS,,,BARCLAYS,
1,tx_00009jGsehBRGqJ8N1IJlp,28/05/2019,14:04:51,Card payment,Boots,💊,Medical,-2.79,GBP,-2.79,GBP,💊,198-200 Fulham Palace Road,,BOOTS FULHAM GBR,


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6442 entries, 0 to 6441
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Transaction ID   6442 non-null   object 
 1   Date             6442 non-null   object 
 2   Time             6442 non-null   object 
 3   Type             6442 non-null   object 
 4   Name             6437 non-null   object 
 5   Emoji            5394 non-null   object 
 6   Category         6442 non-null   object 
 7   Amount           6442 non-null   float64
 8   Currency         6442 non-null   object 
 9   Local amount     6442 non-null   float64
 10  Local currency   6442 non-null   object 
 11  Notes and #tags  1600 non-null   object 
 12  Address          4394 non-null   object 
 13  Receipt          7 non-null      object 
 14  Description      6238 non-null   object 
 15  Category split   0 non-null      float64
dtypes: float64(3), object(13)
memory usage: 805.4+ KB


In [11]:
# Define the classification rules
transaction_type_rules = {
    "wise_cashback": "Income",
    "Pot transfer": "Transfer",
    "overdraft": "Bank Charges",
    "Monzo Paid": "Bank Charges",
    "Flex": "Loan",
    "Account interest": "Income",
    "Bacs (Direct Credit)": "Income",
}

transaction_name_rules = {
    "Shopping": [
        "Amazon",
        "Amazon Music",
        "Audible",
        "Clonezone",
        "Clonezone - Soho",
        "G-star Raw",
        "GSingh",
        "IKEA",
        "Leyland Lsdm Earl's Court",
        "Leyland SDM",
        "Robert Dyas",
        "United Shop Company",
        "eBay",
        "www.thickwall.co.uk",
        "World Duty Free",
        "Apple Store",
        "Futon Ltd",
    ],
    "Eating Out": [
        "Masala",
        "Pix Pintxo Bateman",
        "Kula",
        "Nando’s",
        "SOHO Coffee Co.",
        "Soft Ice",
        "Too Good To Go",
        "Cafe Manhattans",
        "Paul Uk Gloucester Rd",
        "Old Ship, Hammersmith",
        "Black Rabbit Cafe",
        "PAUL Hammersmith",
        "Da Bagel Spot",
        "Bertotti Pure Italian",
        "La Pappardella",
        "Kucci Cafe",
        "Comptoir Libanais",
        "Wondertree",
        "Alamo Spur        3415",
        "Pret",
        "Napier Farmstall  3836",
        "Pizza Home West Brompt",
        "Maoz",
        "Bayswater Arms",
        "Chalet Cafe",
        "Comptons London",
        "Maroush Bakehouse",
        "Lions Prep",
        "The Bolton",
        "Blanche Eatery Kensing",
        "Star Pizza",
        "Audrey Green",
        "Peregrine Farm Stall",
        "Taco Bell",
        "Wahaca",
        "Yoco Pickled Green",
        "Chalet",
        "Tesco Petrol",
        "Hooked Fish Bar",
        "Marias Fish Bar",
        "Ik Infusion Social Cl",
        "V&A Food Market",
        "North Lodge Cafe",
        "Bagel Bakery Bar",
        "GAIL's Bakery",
        "Masala Zone Earls Cour",
        "MEATliquor",
        "Paul",
        "Juicebaby Ltd",
        "Gordon Ramsay Street Burger",
        "Hasty Tasty Pizza",
        "Black Sheep Coffee",
        "Creams Kings Cross Lon",
        "Mcd Seapoint (0465)",
        "Star Wraps",
        "Tapas Revoluti",
        "Mad Paella Ltd.",
        "Big Bite",
        "Crosstown",
        "Shawa Westfield",
        "Costa Coffee",
        "Delhi By Nature Ltd",
        "Burger King",
        "Busaba",
        "Raitakrai",
        "Casa Manolo",
        "Aeroporto de Lisboa",
        "Starbucks",
        "Indi-go Rasoi",
        "The Bull Westfield",
        "Balans Westfield Londo\\unit 1034\\lo",
        "Patri Takeaway",
        "Coffee&cates",
        "Cafe Du Coin",
        "Wok to Walk",
        "Cafe Boheme",
        "Subway",
        "Kings Arms",
        "Pure",
        "Peregrine Farm St26542",
        "The Gallery",
        "Battersea Park Cafe Gr",
        "Emirates Leis Concd Dr",
        "Snackers",
        "KFC",
        "Stella Coffee",
        "Lillie Langtry Fulham",
        "Kings Kebab Hous",
        "The Prince of Teck",
        "McDonald’s",
        "Rwrd Ltd",
        "Fresh Bake",
        "Mona Lisa Cafe",
        "Lamb Rolla",
        "Ibericos",
        "Il Molino",
        "Over Under : West Brom",
        "Mad Paella",
        "Hawker Bar",
        "Pho",
        "Ichiba",
        "Five Guys",
        "Panopolis",
        "PAUL Earls Court",
        "John Forrest Master Ba",
        "Prince Of Teck, Earls",
        "Cleopatra Restaurant",
        "Padaria Lisboa",
        "Wagamama",
        "You Me Sushi Ec",
        "Deliveroo",
        "The Swan",
        "BrewDog",
        "The Monument",
        "Paul Uk Hammersmith",
        "Central Station",
        "Bloomsbury Theatre Bar",
        "Kipps",
        "Joe & The Juice Uk Ltd",
        "Alma Cafe Ltd",
        "Prince Regent",
        "Balans West",
        "Go Go Gourmet Pizza",
        "Organicos Coffee",
        "Fresh Healty Foods Ltd",
        "Riverside Studio",
        "Cofx",
        "Swallow Coffee Shop",
        "Greggs",
        "Domino’s",
        "The Grove",
        "Coffee",
        "Amoret Coffee",
        "Johnnie's Fish Bar",
        "Samosa Haus",
        "Buff Meat",
        "Charco's",
        "Belushis Hammersmith",
        "The William Morris",
        "Leon",
        "Elgin Purepere",
        "Eat17",
        "Emirates Leis Dxbt1 Ar",
        "Bagel Bite",
        "Wasabi",
        "Sky Bar",
        "Londontaxiltd",
        "The Hoarder",
        "Caffè Nero",
        "Organicos Coffee &",
        "Yole Covent Garden",
        "The Grove Tavern",
        "Lebanese Taverna",
        "Ollie’s House Limited",
        "The Plough & Harrow",
        "Segar & Snuff Parl",
    ],
    "Transport": [
        "Sendwave",
        "Zipcar",
        "Tier Mobility",
        "Lime",
        "Transport for London",
        "CMT UK Taxi Fare",
        "Uber",
        "Easi Rent",
        "easirent",
    ],
    "Subscriptions": [
        "Wordtune",
        "Automate.io",
        "Coursera",
        "Rytr - Ai Writer",
        "Netflix",
        "Zapier",
        "Notion2shee",
        "Nutt Labs + Notion Vip",
        "Google",
        "Yourdataltd",
        "Airtable.com/bill",
        "Pipedream, Inc.",
        "Superhuman",
        "Fs Revoicer",
        "Heroku",
        "Deep Learning Courses",
        "Render.com",
        "Setapp",
        "Spark",
        "Motion ",
        "Browserless",
        "www.make.com",
        "Claude",
        "WOW Presents PLUS",
        "Microsoft",
        "Loom Subscription",
        "Amazon Prime",
        "Super Publishing Co.",
        "Apple",
        "Perplexity",
        "Notion",
        "OpenAI",
        "Anthropic",
        "Motion -1 Temp Hold",
        "Brompton Super",
        "ExpressVPN",
        "Grammarly",
        "Granity-ent.com",
        "Heart Internet",
        "Jarvis - Conversion.ai",
        "Realpython",
        "Surfshark",
        "PureGym",
        "Macpaw",
        "Codegate",
    ],
    "Groceries": [
        "Jms Food Store",
        "J M S Foods",
        "Sunfield Foods",
        "Amazon Fresh",
        "Lidl",
        "Waitrose & Partners",
        "M&S",
        "Tesco",
        "Sainsbury’s",
        "Co-op",
        "Deepak Self Service",
        "Chelsea Food Fayre",
        "Chelsea Food Worldsend",
        "Cumberland Food & Wine",
        "Earls Court Food And Wine",
        "J M S Food & News",
        "My Shop",
        "On The Go",
        "Chelsea Food And Wine",
    ],
    "Telephone": [
        "Airalo",
        "TELSERVE LIMITED",
        "EE",
        "plan.com",
        "giffgaff",
    ],
    "Loan": [
        "Credit Resource Solutions",
    ],
    "Medical": [
        "Medicine Chest",
        "Boots",
        "NHS Prescription Prepayment",
        "Green Light Pharmacy",
        "Earls Court Chemist",
        "Superdrug",
        "Zafash Pharmacy",
        "Londonskin&hairclinic",
        "24/7 Zafash Pharmacy",
        "Jhoots Pharmacy",
        "Hammersmith Pharmacy",
    ],
    "Other": [
        "James Vokins",
        "Empriel",
        "PayPal",
        "Rm Media",
        "Leatherpr",
        "Michele Manzolillo",
        "Dipanno Dario",
        "Jan Du plessis revolut",
        "Alessandro Dei Agnoli",
        "Skrill9959",
        "James Vockins",
        "kucoin.com",
        "Patreon",
        "Cash App",
        "Wishtender.com",
        "Skrill3395",
        "ATM",
        "carlos",
        "Kostadin Milchev",
        "Revolut",
        "Koronapay Europe",
        "A Garcia",
        "Cornelioallanj",
        "Carlos",
        "Alesya Zhilenkova",
    ],
    "Credit Cards": [
        "Capital One",
        "Vanquis Bank",
    ],
    "Gift": [
        "Jules Young",
        "Myra Cosio",
        "Jean-Pierre LE TELLIER",
        "Jean-Pierre Le Tillier",
    ],
    "Holiday": [
        "Airbnb",
        "Booking.com",
        "Bootlegger Cape Quarte",
        "The Grey Hotel",
        "Wise Holiday",
        "Woodford Car Hire",
    ],
    "Barber": [
        "Sw5 Barbers Lt",
        "Cut And Go",
        "Old Brompton Barbers",
    ],
    "SA Investment": [
        "Thom",
    ],
    "Tax": [
        "HMRC",
    ],
    "Transfer": [
        "TWL Cattle Farming",
        "Plum",
        "Capital One Mobile App",
        "Jan Du Plessis Virgin Online Current",
    ],
    "Rent": [
        "Hampton Management",
        "Hampton Rent",
    ],
    "Smoking": [
        "On The Goo",
        "The Smoking Jacket",
        "Cheyne News",
        "Deepak Self Service",
        "Lucky Me Enterprise",
        "Smoking",
        "Evapo",
        "Vapourcore Earls Court",
        "UPS",
        "Days &nights L",
        "Day And Night Convenience",
    ],
    "Income": [
        "GOOD RESEARCH LTD",
        "DR BURHAN ALI ADIB AND DR ORIETTA E",
        "DU PLESSIS J V B",
        "- GOOD RESEARCH LT",
        "EARLS CT SUR",
        "Atlantic Medical",
        "LONDON MEDICAL ASSOCIATES LTD",
        "Tide Business Account",
    ],
}

startswith_rules = {
    "Transport": [
        "London Taxi",
        "Tier",
    ],
    "Eating Out": [
        "Toogoodt",
    ],
    "Other": ["Leather", "Coinbase"],
    "Subscription": [
        "Grammerly",
    ],
    "Smoking": {'Krystals',}
} 

# Define functions for classification
def classify_by_type(row):
    return transaction_type_rules.get(row['Type'], 'Uncategorized')

def classify_by_name(row, startswith_rules):
    for category, prefixes in startswith_rules.items():
        for prefix in prefixes:
            prefix = str(prefix)
            if row['Name'].startswith(prefix):
                return category
    return None

def refine_by_name(row):
    category = classify_by_name(row, startswith_rules)
    if category:
        return category

    if row['custom_category'] == 'Uncategorized' or row['custom_category'] in transaction_name_rules:
        for category, names in transaction_name_rules.items():
            if row['Name'] in names:
                return category
    return row['custom_category']

# Load your data and apply the classification
# df['custom_category'] = df.apply(classify_by_type, axis=1)
# df['custom_category'] = df.apply(refine_by_name, axis=1)

In [12]:
data['Name'] = data['Name'].fillna('')

data['custom_category'] = data.apply(classify_by_type, axis=1)
data['custom_category'] = data.apply(refine_by_name, axis=1)

In [13]:
data

Unnamed: 0,Transaction ID,Date,Time,Type,Name,Emoji,Category,Amount,Currency,Local amount,Local currency,Notes and #tags,Address,Receipt,Description,Category split,custom_category
0,tx_00009jGereHTyV50ElCRLl,28/05/2019,11:30:19,Faster payment,DU PLESSIS J V B,,Income,150.00,GBP,150.00,GBP,BARCLAYS,,,BARCLAYS,,Income
1,tx_00009jGsehBRGqJ8N1IJlp,28/05/2019,14:04:51,Card payment,Boots,💊,Medical,-2.79,GBP,-2.79,GBP,💊,198-200 Fulham Palace Road,,BOOTS FULHAM GBR,,Medical
2,tx_00009jGuOQFceohdUyDs1J,28/05/2019,14:24:19,Card payment,Cofx,☕️,Eating out,-3.50,GBP,-3.50,GBP,☕️,220 Fulham Palace Road,,IZ *COFX London GBR,,Eating Out
3,tx_00009jH0DsSa2JC59s2FYP,28/05/2019,15:29:39,Card payment,Eat17,🍽,Eating out,-2.59,GBP,-2.59,GBP,☕️,77 Fulham Palace Road,,EAT17 LONDON GBR,,Eating Out
4,tx_00009jH3AbmTtLDGBj8iNG,28/05/2019,16:02:40,Card payment,Superdrug,💊,Personal care,-9.01,GBP,-9.01,GBP,💅,Unit 26 The Broadway Shopping Centre,,SUPERDRUG STORES PLC HAMMERSMITH GBR,,Medical
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6437,tx_0000AjSgTfMbhNjUqNO13J,30/06/2024,18:35:12,Card payment,Apple,🍎,Subscriptions,-10.99,GBP,-10.99,GBP,,,,APPLE.COM/BILL APPLE.COM/BIL IRL,,Subscriptions
6438,tx_0000AjTM7MxvcLlk4mSRnN,01/07/2024,02:21:47,overdraft,,,Bank Charges,-5.23,GBP,-5.23,GBP,This is the charge for your June 2024 overdraft,,,June overdraft fees,,Bank Charges
6439,tx_0000AjTts7tclBLFbwjjtZ,01/07/2024,08:40:00,Faster payment,Jan du Plessis,,Rent,-40.00,GBP,-40.00,GBP,Jan du Plessis,,,Jan du Plessis,,Uncategorized
6440,tx_0000AjUGo5GEuiHY4Zngg6,01/07/2024,12:56:59,Card payment,Tesco,🍏,Groceries,-13.64,GBP,-13.64,GBP,,294 Old Brompton Road,,TESCO STORES 4926 KENSINGTON GBR,,Groceries


In [14]:
data.to_csv(f"{DATA_PATH}/categories3.csv", index=False)