In [None]:
# -----------------------------------------------------------------------------------
# Finory AI Module: Smart Transaction Categorization
# -----------------------------------------------------------------------------------
# Objective:
# Generate a large synthetic dataset of financial transactions and train an
# AI model to automatically categorize each transaction based on vendor, amount,
# payment method, and other metadata.
#
# Dataset:
# Synthetic transaction data with 5,000 samples
# Each row includes:
#   - transaction_id
#   - vendor
#   - amount
#   - category (target)
#   - date
#   - payment method
#   - description/note
#
# Target Variable:
# 'category'
#   - Example values: 'Groceries', 'Electronics', 'DiningOut', 'Transportation', etc.
#
# Problem Type:
# Multi-Class Classification
#
# Goal:
# Use this synthetic dataset to build and evaluate machine learning models
# that can predict the category of a financial transaction based on its attributes.
# -----------------------------------------------------------------------------------

In [3]:
# 🧠 Finory AI Module: Synthetic Transaction Data Generator (5,000 Rows)
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta

# Initialize Faker for generating realistic fake date
fake = Faker()

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)

# 1. Full vendor list from Finory (trimmed for brevity in this example)
vendor_list = [
    '3M',
  'AbbVie',
  'Abbott Laboratories',
  'Accenture',
  'Activision Blizzard',
  'Adobe Systems',
  'Advanced Micro Devices',
  'Aflac',
  'Agilent Technologies',
  'Air Products & Chemicals',
  'Alaska Air Group',
  'Albemarle Corporation',
  'Alexion Pharmaceuticals',
  'Align Technology',
  'Allegion',
  'Alliant Energy',
  'Allstate',
  'Alphabet',
  'Altria Group',
  'Amazon',
  'Ameren',
  'American Airlines Group',
  'American Electric Power',
  'American Express',
  'American International Group',
  'American Tower',
  'American Water Works',
  'Ameriprise Financial',
  'AmerisourceBergen',
  'Ametek',
  'Amgen',
  'Amphenol',
  'Analog Devices',
  'Ansys',
  'Aon',
  'APA Corporation',
  'Apple',
  'Applied Materials',
  'Archer-Daniels-Midland',
  'Arista Networks',
  'Arthur J. Gallagher & Co.',
  'Assurant',
  'AT&T',
  'Atmos Energy',
  'Autodesk',
  'Automatic Data Processing',
  'AutoZone',
  'AvalonBay Communities',
  'Avery Dennison',
  'Baker Hughes',
  'Ball Corporation',
  'Bank of America',
  'Bath & Body Works',
  'Baxter International',
  'Becton Dickinson',
  'Berkshire Hathaway',
  'Best Buy',
  'Bio-Rad Laboratories',
  'Bio-Techne',
  'Biogen',
  'BlackRock',
  'Boeing',
  'Booking Holdings',
  'BorgWarner',
  'Boston Properties',
  'Boston Scientific',
  'Bristol-Myers Squibb',
  'Broadcom',
  'Broadridge Financial Solutions',
  'Brown & Brown',
  'Brown-Forman',
  'C.H. Robinson Worldwide',
  'Cadence Design Systems',
  'Caesars Entertainment',
  'Camden Property Trust',
  'Campbell Soup Company',
  'Capital One Financial',
  'Cardinal Health',
  'CarMax',
  'Carnival Corporation',
  'Carrier Global',
  'Catalent',
  'Caterpillar',
  'Cboe Global Markets',
  'CBRE Group',
  'CDW',
  'Celanese',
  'Centene Corporation',
  'CenterPoint Energy',
  'Cerner',
  'CF Industries',
  'Charles River Laboratories',
  'Charles Schwab Corporation',
  'Charter Communications',
  'Chevron Corporation',
  'Chipotle Mexican Grill',
  'Chubb Limited',
  'Church & Dwight',
  'Cigna',
  'Cincinnati Financial',
  'Cintas',
  'Cisco Systems',
  'Citigroup',
  'Citizens Financial Group',
  'Clorox',
  'CME Group',
  'CMS Energy',
  'Coca-Cola Company',
  'Cognizant Technology Solutions',
  'Colgate-Palmolive',
  'Comcast',
  'Comerica',
  'Conagra Brands',
  'ConocoPhillips',
  'Consolidated Edison',
  'Constellation Brands',
  'CooperCompanies',
  'Copart',
  'Corning Inc.',
  'Corteva',
  'Costco',
  'Coterra Energy',
  'Crown Castle',
  'CSX',
  'Cummins',
  'CVS Health',
  'D.R. Horton',
  'Danaher Corporation',
  'Darden Restaurants',
  'DaVita Inc.',
  'Deere & Company',
  'Delta Air Lines',
  'Dentsply Sirona',
  'Devon Energy',
  'DexCom',
  'Diamondback Energy',
  'Digital Realty',
  'Discover Financial Services',
  'Dish Network',
  'Dollar General',
  'Dollar Tree',
  'Dominion Energy',
  'Domino\'s Pizza',
  'Dover Corporation',
  'Dow Inc.',
  'DTE Energy',
  'Duke Energy',
  'DuPont',
  'DXC Technology',
  'Eastman Chemical Company',
  'Eaton Corporation',
  'eBay',
  'Ecolab',
  'Edison International',
  'Edwards Lifesciences',
  'Electronic Arts',
  'Elevance Health',
  'Eli Lilly and Company',
  'Emerson Electric',
  'Enphase Energy',
  'Entergy',
  'EOG Resources',
  'EPAM Systems',
  'Equifax',
  'Equinix',
  'Equity Residential',
  'Essex Property Trust',
  'Estee Lauder Companies',
  'Eversource Energy',
  'Evergy',
  'Everest Re',
  'Exelon',
  'Expedia Group',
  'Expeditors International',
  'Extra Space Storage',
  'ExxonMobil',
  'F5 Networks',
  'FactSet',
  'Fastenal',
  'Federal Realty',
  'FedEx',
  'Fidelity National Information Services',
  'Fifth Third Bancorp',
  'First Republic Bank',
  'First Solar',
  'FirstEnergy',
  'Fiserv',
  'FleetCor',
  'FMC Corporation',
  'Ford Motor Company',
  'Fortinet',
  'Fortive',
  'Fox Corporation',
  'Franklin Resources',
  'Freeport-McMoRan',
  'Garmin',
  'Gartner',
  'Generac',
  'General Dynamics',
  'General Electric',
  'General Mills',
  'General Motors',
  'Genuine Parts Company',
  'Gilead Sciences',
  'Globe Life',
  'Global Payments',
  'Goldman Sachs',
  'Halliburton',
  'Hartford Financial Services',
  'Hasbro',
  'HCA Healthcare',
  'Healthpeak Properties',
  'Henry Schein',
  'Hershey Company',
  'Hess Corporation',
  'Hewlett Packard Enterprise',
  'Hilton Worldwide',
  'Hologic',
  'Home Depot',
  'Honeywell',
  'Hormel Foods',
  'Host Hotels & Resorts',
  'Howmet Aerospace',
  'HP Inc.',
  'Humana',
  'Huntington Bancshares',
  'Huntington Ingalls Industries',
  'IDEX Corporation',
  'IDEXX Laboratories',
  'Illinois Tool Works',
  'Illumina',
  'Incyte',
  'Ingersoll Rand',
  'Intel',
  'Intercontinental Exchange',
  'International Business Machines',
  'International Paper',
  'Interpublic Group',
  'Intuit',
  'Intuitive Surgical',
  'Invesco',
  'Invitation Homes',
  'IQVIA',
  'Iron Mountain',
  'J.B. Hunt Transport Services',
  'Jack Henry & Associates',
  'Jacobs Engineering',
  'Johnson & Johnson',
  'Johnson Controls',
  'JPMorgan Chase',
  'Juniper Networks',
  'Kaiser Aluminum',
  'Kansas City Southern',
  'Kellogg Company',
  'KeyCorp',
  'Keysight Technologies',
  'Kimberly-Clark',
  'Kimco Realty',
  'Kinder Morgan',
  'KLA Corporation',
  'Kraft Heinz',
  'Kroger',
  'L3Harris Technologies',
  'Laboratory Corporation of America',
  'Lam Research',
  'Lamb Weston Holdings',
  'Las Vegas Sands',
  'Leidos',
  'Lennar',
  'Lincoln National Corporation',
  'Linde plc',
  'Live Nation Entertainment',
  'LKQ Corporation',
  'Lockheed Martin',
  'Loews Corporation',
  'Lowe\'s',
  'Lumen Technologies',
  'LyondellBasell',
  'M&T Bank',
  'Marathon Oil',
  'Marathon Petroleum',
  'MarketAxess',
  'Marriott International',
  'Marsh & McLennan',
  'Martin Marietta Materials',
  'Masco',
  'Mastercard',
  'Match Group',
  'McCormick & Company',
  'McDonald\'s',
  'McKesson',
  'Medtronic',
  'Merck & Co.',
  'Meta Platforms',
  'MetLife',
  'Mettler-Toledo',
  'MGM Resorts',
  'Microchip Technology',
  'Micron Technology',
  'Microsoft',
  'Mid-America Apartment Communities',
  'Mohawk Industries',
  'Molson Coors Beverage Company',
  'Mondelez International',
  'Monolithic Power Systems',
  'Monster Beverage',
  'Moody\'s Corporation',
  'Morgan Stanley',
  'Mosaic Company',
  'Motorola Solutions',
  'MSCI',
  'Nasdaq, Inc.',
  'National Oilwell Varco',
  'NetApp',
  'Netflix',
  'Newell Brands',
  'Newmont Corporation',
  'News Corp',
  'NextEra Energy',
  'Nielsen Holdings',
  'Nike, Inc.',
  'NiSource',
  'Noble Energy',
  'Nordstrom',
  'Norfolk Southern Railway',
  'Northern Trust',
  'Northrop Grumman',
  'NortonLifeLock',
  'NRG Energy',
  'Nucor',
  'NVIDIA',
  'NVR, Inc.',
  'NXP Semiconductors',
  'Occidental Petroleum',
  'Old Dominion Freight Line',
  'Omnicom Group',
  'ON Semiconductor',
  'Oneok',
  'Oracle Corporation',
  'Organon & Co.',
  'O\'Reilly Auto Parts',
  'Otis Worldwide',
  'Paccar',
  'Packaging Corporation of America',
  'Paramount Global',
  'Parker Hannifin',
  'Paychex',
  'Paycom',
  'PayPal',
  'Pentair',
  'PepsiCo',
  'PerkinElmer',
  'Pfizer',
  'PG&E Corporation',
  'Philip Morris International',
  'Phillips 66',
  'Pinnacle West Capital',
  'Pioneer Natural Resources',
  'PNC Financial Services',
  'Pool Corporation',
  'PPG Industries',
  'PPL Corporation',
  'Principal Financial Group',
  'Procter & Gamble',
  'Progressive Corporation',
  'Prologis',
  'Prudential Financial',
  'Public Service Enterprise Group',
  'PulteGroup',
  'Qorvo',
  'Quanta Services',
  'Quest Diagnostics',
  'Ray',
  'Raymond James Financial',
  'Raytheon Technologies',
  'Realty Income',
  'Regency Centers',
  'Regeneron Pharmaceuticals',
  'Regions Financial',
  'Republic Services',
  'ResMed',
  'Robert Half',
  'Rockwell Automation',
  'Rollins',
  'Roper Technologies',
  'Ross Stores',
  'Royal Caribbean Group',
  'RTX Corporation',
  'S&P Global',
  'Salesforce',
  'SBA Communications',
  'Schlumberger',
  'Seagate Technology',
  'Sealed Air',
  'Seattle Genetics',
  'Sempra Energy',
  'ServiceNow',
  'Sherwin-Williams',
  'Signature Bank',
  'Simon Property Group',
  'Skyworks Solutions',
  'SL Green Realty',
  'Smucker (J.M.)',
  'Snap-on',
  'SolarEdge Technologies',
  'Southern Company',
  'Southwest Airlines',
  'Stanley Black & Decker',
  'Starbucks',
  'State Street Corporation',
  'Steel Dynamics',
  'Steris',
  'Stryker Corporation',
  'SVB Financial',
  'Synchrony Financial',
  'Synopsys',
  'Sysco',
  'T-Mobile US',
  'T. Rowe Price',
  'Take-Two Interactive',
  'Tapestry, Inc.',
  'Targa Resources',
  'Target',
  'TE Connectivity',
  'Teledyne Technologies',
  'Teleflex',
  'Teradyne',
  'Tesla',
  'Textron',
  'The Hershey Company',
  'The Trade Desk',
  'Thermo Fisher Scientific',
  'TJX Companies',
  'Tractor Supply Company',
  'TransDigm Group',
  'Trane Technologies',
  'Travelers Companies',
  'Trimble Inc.',
  'Truist Financial',
  'TTEC Holdings',
  'Tyler Technologies',
  'Tyson Foods',
  'UDR, Inc.',
  'U.S. Bancorp',
  'Ulta Beauty',
  'Union Pacific',
  'United Airlines Holdings',
  'United Parcel Service',
  'United Rentals',
  'UnitedHealth Group',
  'Universal Health Services',
  'Unum Group',
  'Valero Energy',
  'Ventas',
  'Verisign',
  'Verisk Analytics',
  'Verizon Communications',
  'Vertex Pharmaceuticals',
  'VF Corporation',
  'ViacomCBS',
  'Visa Inc.',
  'Vornado Realty Trust',
  'Vulcan Materials',
  'W. R. Berkley',
  'W.W. Grainger',
  'Wabtec Corporation',
  'Walgreens Boots Alliance',
  'Walmart',
  'Waste Management',
  'Waters Corporation',
  'WEC Energy Group',
  'Wells Fargo',
  'West Pharmaceutical Services',
  'Western Digital',
  'WestRock',
  'Weyerhaeuser',
  'Whirlpool Corporation',
  'Williams Companies',
  'Willis Towers Watson',
  'Wynn Resorts',
  'Xcel Energy',
  'Xerox Holdings',
  'Xilinx',
  'Xylem Inc.',
  'Yum! Brands',
  'Zebra Technologies',
  'Zimmer Biomet',
  'Zions Bancorporation',
  'Zoom Video Communications',
  'Zscaler'
]

# 2. Finory-style category mapping for sample vendors
category_map = {
    '3M': 'Hardware',
'AbbVie': 'Pharmacy',
'Abbott Laboratories': 'Pharmacy',
'Accenture': 'Consulting',
'Activision Blizzard': 'Games',
'Adobe Systems': 'Software',
'Advanced Micro Devices': 'Electronics',
'Aflac': 'Insurance',
'Agilent Technologies': 'Electronics',
'Air Products & Chemicals': 'Supplies',
'Alaska Air Group': 'Airfare',
'Albemarle Corporation': 'Supplies',
'Alexion Pharmaceuticals': 'Pharmacy',
'Align Technology': 'Health',
'Allegion': 'Hardware',
'Alliant Energy': 'Utilities',
'Allstate': 'Insurance',
'Alphabet': 'Software',
'Altria Group': 'Miscellaneous',
'Amazon': 'Shopping',
'Ameren': 'Utilities',
'American Airlines Group': 'Airfare',
'American Electric Power': 'Utilities',
'American Express': 'CreditCard',
'American International Group': 'Insurance',
'American Tower': 'Utilities',
'American Water Works': 'UtilitiesWater',
'Ameriprise Financial': 'FinanceCharges',
'AmerisourceBergen': 'Pharmacy',
'Ametek': 'Electronics',
'Amgen': 'Pharmacy',
'Amphenol': 'Electronics',
'Analog Devices': 'Electronics',
'Ansys': 'Software',
'Aon': 'Insurance',
'APA Corporation': 'Gas',
'Apple': 'Electronics',
'Applied Materials': 'Electronics',
'Archer-Daniels-Midland': 'Groceries',
'Arista Networks': 'Electronics',
'Arthur J. Gallagher & Co.': 'Insurance',
'Assurant': 'Insurance',
'AT&T': 'PhoneBill',
'Atmos Energy': 'GasBill',
'Autodesk': 'Software',
'Automatic Data Processing': 'Software',
'AutoZone': 'CarMaintenance',
'AvalonBay Communities': 'Rent',
'Avery Dennison': 'Supplies',
'Baker Hughes': 'Gas',
'Ball Corporation': 'Supplies',
'Bank of America': 'BankFees',
'Bath & Body Works': 'Beauty',
'Baxter International': 'MedicalSupplies',
'Becton Dickinson': 'MedicalSupplies',
'Berkshire Hathaway': 'FinanceCharges',
'Best Buy': 'Electronics',
'Bio-Rad Laboratories': 'Health',
'Bio-Techne': 'Health',
'Biogen': 'Pharmacy',
'BlackRock': 'InvestmentFees',
'Boeing': 'Travel',
'Booking Holdings': 'Travel',
'BorgWarner': 'CarMaintenance',
'Boston Properties': 'Rent',
'Boston Scientific': 'Health',
'Bristol-Myers Squibb': 'Pharmacy',
'Broadcom': 'Electronics',
'Broadridge Financial Solutions': 'FinanceCharges',
'Brown & Brown': 'Insurance',
'Brown-Forman': 'Alcohol',
'C.H. Robinson Worldwide': 'Delivery',
'Cadence Design Systems': 'Software',
'Caesars Entertainment': 'Casino',
'Camden Property Trust': 'Rent',
'Campbell Soup Company': 'Groceries',
'Capital One Financial': 'CreditCard',
'Cardinal Health': 'Pharmacy',
'CarMax': 'CarPayment',
'Carnival Corporation': 'Vacation',
'Carrier Global': 'Appliances',
'Catalent': 'Pharmacy',
'Caterpillar': 'Hardware',
'Cboe Global Markets': 'InvestmentFees',
'CBRE Group': 'Rent',
'CDW': 'Electronics',
'Celanese': 'Supplies',
'Centene Corporation': 'HealthInsurance',
'CenterPoint Energy': 'Utilities',
'Cerner': 'Health',
'CF Industries': 'Fertilizer',
'Charles River Laboratories': 'Pharmacy',
'Charles Schwab Corporation': 'InvestmentFees',
'Charter Communications': 'Internet',
'Chevron Corporation': 'Gas',
'Chipotle Mexican Grill': 'DiningOut',
'Chubb Limited': 'Insurance',
'Church & Dwight': 'Cleaning',
'Cigna': 'HealthInsurance',
'Cincinnati Financial': 'Insurance',
'Cintas': 'Clothing',
'Cisco Systems': 'Electronics',
'Citigroup': 'BankFees',
'Citizens Financial Group': 'BankFees',
'Clorox': 'Cleaning',
'CME Group': 'InvestmentFees',
'CMS Energy': 'Utilities',
'Coca-Cola Company': 'Drinks',
'Cognizant Technology Solutions': 'Software',
'Colgate-Palmolive': 'PersonalCare',
'Comcast': 'Internet',
'Comerica': 'BankFees',
'Conagra Brands': 'Groceries',
'ConocoPhillips': 'Gas',
'Consolidated Edison': 'Utilities',
'Constellation Brands': 'Alcohol',
'CooperCompanies': 'Pharmacy',
'Copart': 'CarMaintenance',
'Corning Inc.': 'Electronics',
'Corteva': 'Fertilizer',
'Costco': 'Groceries',
'Coterra Energy': 'Gas',
'Crown Castle': 'Utilities',
'CSX': 'Transportation',
'Cummins': 'Hardware',
'CVS Health': 'Pharmacy',
'D.R. Horton': 'Mortgage',
'Danaher Corporation': 'Health',
'Darden Restaurants': 'DiningOut',
'DaVita Inc.': 'Health',
'Deere & Company': 'Hardware',

'Delta Air Lines': 'Airfare',
'Dentsply Sirona': 'MedicalSupplies',
'Devon Energy': 'Gas',
'DexCom': 'MedicalSupplies',
'Diamondback Energy': 'Gas',
'Digital Realty': 'Internet',
'Discover Financial Services': 'CreditCard',
'Dish Network': 'Streaming',
'Dollar General': 'Groceries',
'Dollar Tree': 'Groceries',
'Dominion Energy': 'Utilities',
'Domino\'s Pizza': 'DiningOut',
'Dover Corporation': 'Maintenance',
'Dow Inc.': 'Chemicals',
'DTE Energy': 'Electric',
'Duke Energy': 'Electric',
'DuPont': 'Chemicals',
'DXC Technology': 'Software',
'Eastman Chemical Company': 'Chemicals',
'Eaton Corporation': 'Hardware',
'eBay': 'Shopping',
'Ecolab': 'Cleaning',
'Edison International': 'Electric',
'Edwards Lifesciences': 'MedicalSupplies',
'Electronic Arts': 'Games',
'Elevance Health': 'HealthInsurance',
'Eli Lilly and Company': 'Pharmacy',
'Emerson Electric': 'Hardware',
'Enphase Energy': 'Electric',
'Entergy': 'Utilities',
'EOG Resources': 'Gas',
'EPAM Systems': 'Software',
'Equifax': 'CreditCard',
'Equinix': 'Internet',
'Equity Residential': 'Rent',
'Essex Property Trust': 'Rent',
'Estee Lauder Companies': 'Cosmetics',
'Eversource Energy': 'Electric',
'Evergy': 'Electric',
'Everest Re': 'Insurance',
'Exelon': 'Electric',
'Expedia Group': 'Travel',
'Expeditors International': 'Delivery',
'Extra Space Storage': 'Storage',
'ExxonMobil': 'Gas',
'F5 Networks': 'Software',
'FactSet': 'FinanceCharges',
'Fastenal': 'Hardware',
'Federal Realty': 'Rent',
'FedEx': 'Delivery',
'Fidelity National Information Services': 'FinanceCharges',
'Fifth Third Bancorp': 'BankFees',
'First Republic Bank': 'BankFees',
'First Solar': 'Electric',
'FirstEnergy': 'Electric',
'Fiserv': 'FinanceCharges',
'FleetCor': 'Transportation',
'FMC Corporation': 'Chemicals',
'Ford Motor Company': 'CarPayment',
'Fortinet': 'Software',
'Fortive': 'Electronics',
'Fox Corporation': 'Entertainment',
'Franklin Resources': 'Investments',
'Freeport-McMoRan': 'Mining',
'Garmin': 'Electronics',
'Gartner': 'Consulting',
'Generac': 'Electronics',
'General Dynamics': 'Defense',
'General Electric': 'Electronics',
'General Mills': 'Groceries',
'General Motors': 'CarPayment',
'Genuine Parts Company': 'CarMaintenance',
'Gilead Sciences': 'Pharmacy',
'Globe Life': 'LifeInsurance',
'Global Payments': 'FinanceCharges',
'Goldman Sachs': 'Investments',
'Halliburton': 'Gas',
'Hartford Financial Services': 'Insurance',
'Hasbro': 'Toys',
'HCA Healthcare': 'Doctor',
'Healthpeak Properties': 'Rent',
'Henry Schein': 'MedicalSupplies',
'Hershey Company': 'Snacks',
'Hess Corporation': 'Gas',
'Hewlett Packard Enterprise': 'Computer',
'Hilton Worldwide': 'Hotel',
'Hologic': 'MedicalSupplies',
'Home Depot': 'HomeImprovement',
'Honeywell': 'Electronics',
'Hormel Foods': 'Groceries',
'Host Hotels & Resorts': 'Hotel',
'Howmet Aerospace': 'Aerospace',
'HP Inc.': 'Computer',
'Humana': 'HealthInsurance',
'Huntington Bancshares': 'BankFees',
'Huntington Ingalls Industries': 'Defense',
'IDEX Corporation': 'Manufacturing',
'IDEXX Laboratories': 'Veterinary',
'Illinois Tool Works': 'Hardware',
'Illumina': 'MedicalSupplies',
'Incyte': 'Pharmacy',
'Ingersoll Rand': 'Hardware',
'Intel': 'Computer',
'Intercontinental Exchange': 'Investments',
'International Business Machines': 'Computer',
'International Paper': 'OfficeSupplies',
'Interpublic Group': 'Marketing',
'Intuit': 'Software',
'Intuitive Surgical': 'MedicalSupplies',
'Invesco': 'Investments',
'Invitation Homes': 'Rent',
'IQVIA': 'MedicalSupplies',
'Iron Mountain': 'Storage',
'J.B. Hunt Transport Services': 'Delivery',
'Jack Henry & Associates': 'Software',
'Jacobs Engineering': 'Consulting',
'Johnson & Johnson': 'Pharmacy',
'Johnson Controls': 'HomeImprovement',
'JPMorgan Chase': 'BankFees',
'Juniper Networks': 'Software',
'Kaiser Aluminum': 'Manufacturing',
'Kansas City Southern': 'Train',
'Kellogg Company': 'Groceries',
'KeyCorp': 'BankFees',
'Keysight Technologies': 'Electronics',
'Kimberly-Clark': 'Bathroom',
'Kimco Realty': 'Rent',
'Kinder Morgan': 'Gas',
'KLA Corporation': 'Electronics',
'Kraft Heinz': 'Groceries',
'Kroger': 'Groceries',
'L3Harris Technologies': 'Defense',
'Laboratory Corporation of America': 'MedicalSupplies',
'Lam Research': 'Electronics',
'Lamb Weston Holdings': 'Groceries',
'Las Vegas Sands': 'Casino',
'Leidos': 'Consulting',
'Lennar': 'Mortgage',
'Lincoln National Corporation': 'LifeInsurance',
'Linde plc': 'Gas',
'Live Nation Entertainment': 'Events',
'LKQ Corporation': 'CarMaintenance',
'Lockheed Martin': 'Defense',
'Loews Corporation': 'Insurance',
'Lowe\'s': 'HomeImprovement',
'Lumen Technologies': 'Internet',
'LyondellBasell': 'Chemicals',
'M&T Bank': 'BankFees',
'Marathon Oil': 'Gas',
'Marathon Petroleum': 'Gas',
'MarketAxess': 'Investments',
'Marriott International': 'Hotel',
'Marsh & McLennan': 'Insurance',
'Martin Marietta Materials': 'Construction',
'Masco': 'HomeImprovement',
'Mastercard': 'CreditCard',
'Match Group': 'Dating',
'McCormick & Company': 'Groceries',
'McDonald\'s': 'DiningOut',
'McKesson': 'Pharmacy',
'Medtronic': 'MedicalSupplies',
'Merck & Co.': 'Pharmacy',
'Meta Platforms': 'Advertising',
'MetLife': 'Insurance',
'Mettler-Toledo': 'MedicalSupplies',
'MGM Resorts': 'Entertainment',
'Microchip Technology': 'Electronics',
'Micron Technology': 'Electronics',
'Microsoft': 'Software',
'Mid-America Apartment Communities': 'Rent',
'Mohawk Industries': 'HomeImprovement',
'Molson Coors Beverage Company': 'Alcohol',
'Mondelez International': 'Snacks',
'Monolithic Power Systems': 'Electronics',
'Monster Beverage': 'Drinks',
'Moody\'s Corporation': 'FinanceCharges',
'Morgan Stanley': 'Investments',
'Mosaic Company': 'Fertilizer',
'Motorola Solutions': 'Electronics',
'MSCI': 'Investments',
'Nasdaq, Inc.': 'Investments',
'National Oilwell Varco': 'Gas',
'NetApp': 'Software',
'Netflix': 'Streaming',
'Newell Brands': 'HomeImprovement',
'Newmont Corporation': 'Investments',
'News Corp': 'Media',
'NextEra Energy': 'Electric',
'Nielsen Holdings': 'Marketing',
'Nike, Inc.': 'Clothing',
'NiSource': 'Gas',
'Noble Energy': 'Gas',
'Nordstrom': 'Clothing',
'Norfolk Southern Railway': 'Transportation',
'Northern Trust': 'Investments',
'Northrop Grumman': 'Defense',
'NortonLifeLock': 'SecuritySystem',
'NRG Energy': 'Electric',
'Nucor': 'Manufacturing',
'NVIDIA': 'Electronics',
'NVR, Inc.': 'Mortgage',
'NXP Semiconductors': 'Electronics',
'Occidental Petroleum': 'Gas',
'Old Dominion Freight Line': 'Delivery',
'Omnicom Group': 'Marketing',
'ON Semiconductor': 'Electronics',
'Oneok': 'Gas',
'Oracle Corporation': 'Software',
'Organon & Co.': 'Pharmacy',
'O\'Reilly Auto Parts': 'CarMaintenance',
'Otis Worldwide': 'ElevatorMaintenance',
'Paccar': 'Transportation',
'Packaging Corporation of America': 'OfficeSupplies',
'Paramount Global': 'Streaming',
'Parker Hannifin': 'Hardware',
'Paychex': 'Payroll',
'Paycom': 'Payroll',
'PayPal': 'FinanceCharges',
'Pentair': 'HomeImprovement',
'PepsiCo': 'Drinks',
'PerkinElmer': 'MedicalSupplies',
'Pfizer': 'Pharmacy',
'PG&E Corporation': 'Electric',
'Philip Morris International': 'Tobacco',
'Phillips 66': 'Gas',
'Pinnacle West Capital': 'Electric',
'Pioneer Natural Resources': 'Gas',
'PNC Financial Services': 'BankFees',
'Pool Corporation': 'OutdoorGear',
'PPG Industries': 'Paint',
'PPL Corporation': 'Electric',
'Principal Financial Group': 'Investments',
'Procter & Gamble': 'Household',
'Progressive Corporation': 'AutoInsurance',
'Prologis': 'Rent',
'Prudential Financial': 'LifeInsurance',
'Public Service Enterprise Group': 'Electric',
'PulteGroup': 'Mortgage',
'Qorvo': 'Electronics',
'Quanta Services': 'Utilities',
'Quest Diagnostics': 'MedicalSupplies',
'Ray': 'Defense',
'Raymond James Financial': 'Investments',
'Raytheon Technologies': 'Defense',
'Realty Income': 'Rent',
'Regency Centers': 'Rent',
'Regeneron Pharmaceuticals': 'Pharmacy',
'Regions Financial': 'BankFees',
'Republic Services': 'Trash',
'ResMed': 'MedicalSupplies',
'Robert Half': 'Consulting',
'Rockwell Automation': 'Hardware',
'Rollins': 'PestControl',
'Roper Technologies': 'Electronics',
'Ross Stores': 'Clothing',
'Royal Caribbean Group': 'Travel',
'RTX Corporation': 'Defense',
'S&P Global': 'Investments',
'Salesforce': 'Software',
'SBA Communications': 'Phone',
'Schlumberger': 'Gas',
'Seagate Technology': 'Computer',
'Sealed Air': 'Supplies',
'Seattle Genetics': 'Pharmacy',
'Sempra Energy': 'Gas',
'ServiceNow': 'Software',
'Sherwin-Williams': 'Paint',
'Signature Bank': 'BankFees',
'Simon Property Group': 'Rent',
'Skyworks Solutions': 'Electronics',
'SL Green Realty': 'Rent',
'Smucker (J.M.)': 'Groceries',
'Snap-on': 'Hardware',
'SolarEdge Technologies': 'Utilities',
'Southern Company': 'Electric',
'Southwest Airlines': 'Airfare',
'Stanley Black & Decker': 'Tools',
'Starbucks': 'Coffee',
'State Street Corporation': 'Investments',
'Steel Dynamics': 'Manufacturing',
'Steris': 'MedicalSupplies',
'Stryker Corporation': 'MedicalSupplies',
'SVB Financial': 'BankFees',
'Synchrony Financial': 'CreditCard',
'Synopsys': 'Software',
'Sysco': 'Groceries',
'T-Mobile US': 'PhoneBill',
'T. Rowe Price': 'Investments',
'Take-Two Interactive': 'Games',
'Tapestry, Inc.': 'Clothing',
'Targa Resources': 'Gas',
'Target': 'Groceries',
'TE Connectivity': 'Electronics',
'Teledyne Technologies': 'Electronics',
'Teleflex': 'MedicalSupplies',
'Teradyne': 'Electronics',
'Tesla': 'CarPayment',
'Textron': 'Aerospace',
'The Hershey Company': 'Snacks',
'The Trade Desk': 'Advertising',
'Thermo Fisher Scientific': 'MedicalSupplies',
'TJX Companies': 'Clothing',
'Tractor Supply Company': 'Hardware',
'TransDigm Group': 'Aerospace',
'Trane Technologies': 'HomeImprovement',
'Travelers Companies': 'Insurance',
'Trimble Inc.': 'Electronics',
'Truist Financial': 'BankFees',
'TTEC Holdings': 'Consulting',
'Tyler Technologies': 'Software',
'Tyson Foods': 'Groceries',
'UDR, Inc.': 'Rent',
'U.S. Bancorp': 'BankFees',
'Ulta Beauty': 'Beauty',
'Union Pacific': 'Transportation',
'United Airlines Holdings': 'Airfare',
'United Parcel Service': 'Delivery',
'United Rentals': 'Equipment',
'UnitedHealth Group': 'HealthInsurance',
'Universal Health Services': 'Doctor',
'Unum Group': 'Insurance',
'Valero Energy': 'Gas',
'Ventas': 'Rent',
'Verisign': 'Software',
'Verisk Analytics': 'Analytics',
'Verizon Communications': 'PhoneBill',
'Vertex Pharmaceuticals': 'Pharmacy',
'VF Corporation': 'Clothing',
'ViacomCBS': 'Media',
'Visa Inc.': 'CreditCard',
'Vornado Realty Trust': 'Rent',
'Vulcan Materials': 'Construction',
'W. R. Berkley': 'Insurance',
'W.W. Grainger': 'Hardware',
'Wabtec Corporation': 'Transportation',
'Walgreens Boots Alliance': 'Pharmacy',
'Walmart': 'Groceries',
'Waste Management': 'Trash',
'Waters Corporation': 'MedicalSupplies',
'WEC Energy Group': 'Electric',
'Wells Fargo': 'BankFees',
'West Pharmaceutical Services': 'MedicalSupplies',
'Western Digital': 'Computer',
'WestRock': 'OfficeSupplies',
'Weyerhaeuser': 'Construction',
'Whirlpool Corporation': 'Appliances',
'Williams Companies': 'Gas',
'Willis Towers Watson': 'Insurance',
'Wynn Resorts': 'Casino',
'Xcel Energy': 'Electric',
'Xerox Holdings': 'OfficeSupplies',
'Xilinx': 'Electronics',
'Xylem Inc.': 'UtilitiesWater',
'Yum! Brands': 'DiningOut',
'Zebra Technologies': 'Electronics',
'Zimmer Biomet': 'MedicalSupplies',
'Zions Bancorporation': 'BankFees',
'Zoom Video Communications': 'Software',
'Zscaler': 'SecuritySystem'
}

# 3. Sample payment methods
payment_methods = ['Visa', 'MasterCard', 'AmEx', 'PayPal', 'Apple Pay']

# 4. Generate a single synthetic transaction
def generate_transaction():
    vendor = random.choice(vendor_list)
    category = category_map.get(vendor, 'Miscellaneous')
    amount = round(np.random.exponential(scale=40), 2)
    if amount < 5:
        amount += 5

    days_ago = random.randint(0, 180)
    date = datetime.now() - timedelta(days=days_ago)

    return {
        "transaction_id": fake.uuid4(),
        "vendor": vendor,
        "amount": amount,
        "category": category,
        "date": date.strftime("%Y-%m-%d"),
        "payment_method": random.choice(payment_methods),
        "note": fake.sentence(nb_words=5)
    }

# 5. Generate a dataset of 5,000 transactions
transactions = [generate_transaction() for _ in range(5000)]
df = pd.DataFrame(transactions)

# Save to CSV
output_path = "../data/synthetic_finory_transactions.csv"
df.to_csv(output_path, index=False)
print(f"✅ Data saved to {output_path}")

✅ Data saved to ../data/synthetic_finory_transactions.csv
