In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [55]:
data = [
    {"description": "Bought groceries from the supermarket", "category": "Groceries"},
    {"description": "Purchased fruits and vegetables", "category": "Groceries"},
    {"description": "Weekly grocery shopping", "category": "Groceries"},
    {"description": "Dinner at a restaurant", "category": "Dining"},
    {"description": "Lunch at a cafe", "category": "Dining"},
    {"description": "Ordered pizza for delivery", "category": "Dining"},
    {"description": "Paid electricity bill", "category": "Utilities"},
    {"description": "Water bill payment", "category": "Utilities"},
    {"description": "Internet subscription fee", "category": "Utilities"},
    {"description": "Monthly rent payment", "category": "Rent/Mortgage"},
    {"description": "Mortgage installment", "category": "Rent/Mortgage"},
    {"description": "Apartment rent", "category": "Rent/Mortgage"},
    {"description": "Bus fare to work", "category": "Transportation"},
    {"description": "Fuel for the car", "category": "Transportation"},
    {"description": "Taxi ride to the airport", "category": "Transportation"},
    {"description": "Doctor's appointment", "category": "Health"},
    {"description": "Medicine from the pharmacy", "category": "Health"},
    {"description": "Gym membership fee", "category": "Health"},
    {"description": "Movie tickets", "category": "Entertainment"},
    {"description": "Concert tickets", "category": "Entertainment"},
    {"description": "Netflix subscription", "category": "Entertainment"},
    {"description": "New pair of shoes", "category": "Clothing"},
    {"description": "Winter jacket purchase", "category": "Clothing"},
    {"description": "Bought a formal shirt", "category": "Clothing"},
    {"description": "Online course subscription", "category": "Education"},
    {"description": "Textbooks for college", "category": "Education"},
    {"description": "Workshop registration fee", "category": "Education"},
    {"description": "Miscellaneous expenses", "category": "Other Expenses"},
    {"description": "Charity donation", "category": "Other Expenses"},
    {"description": "Gift for a friend", "category": "Other Expenses"},
    {"description": "Bought snacks from the store", "category": "Groceries"},
    {"description": "Breakfast at a diner", "category": "Dining"},
    {"description": "Gas bill payment", "category": "Utilities"},
    {"description": "House rent", "category": "Rent/Mortgage"},
    {"description": "Train ticket to the city", "category": "Transportation"},
    {"description": "Dental checkup", "category": "Health"},
    {"description": "Video game purchase", "category": "Entertainment"},
    {"description": "Bought a new dress", "category": "Clothing"},
    {"description": "Tuition fee payment", "category": "Education"},
    {"description": "Unexpected repair costs", "category": "Other Expenses"},
    {"description": "Bought detergent and cleaning supplies", "category": "Groceries"},
    {"description": "Coffee at a coffee shop", "category": "Dining"},
    {"description": "Phone bill payment", "category": "Utilities"},
    {"description": "Condominium maintenance fee", "category": "Rent/Mortgage"},
    {"description": "Bicycle repair costs", "category": "Transportation"},
    {"description": "Vitamins and supplements", "category": "Health"},
    {"description": "Bowling night with friends", "category": "Entertainment"},
    {"description": "Bought a new pair of jeans", "category": "Clothing"},
    {"description": "Language course fee", "category": "Education"},
    {"description": "Unexpected medical expenses", "category": "Other Expenses"},
    {"description": "Bought milk and bread from the store", "category": "Groceries"},
    {"description": "Purchased organic eggs and cheese", "category": "Groceries"},
    {"description": "Stocked up on canned goods", "category": "Groceries"},
    {"description": "Dinner at a sushi restaurant", "category": "Dining"},
    {"description": "Lunch at a burger joint", "category": "Dining"},
    {"description": "Ordered Chinese takeout", "category": "Dining"},
    {"description": "Paid the gas bill for the month", "category": "Utilities"},
    {"description": "Electricity bill for the apartment", "category": "Utilities"},
    {"description": "Monthly water bill payment", "category": "Utilities"},
    {"description": "Rent for the office space", "category": "Rent/Mortgage"},
    {"description": "Mortgage payment for the house", "category": "Rent/Mortgage"},
    {"description": "Monthly condo fee", "category": "Rent/Mortgage"},
    {"description": "Train ticket to visit family", "category": "Transportation"},
    {"description": "Car insurance premium", "category": "Transportation"},
    {"description": "Parking fee at the mall", "category": "Transportation"},
    {"description": "Annual health checkup", "category": "Health"},
    {"description": "Prescription medication", "category": "Health"},
    {"description": "Yoga class fee", "category": "Health"},
    {"description": "Tickets to a comedy show", "category": "Entertainment"},
    {"description": "Purchased a new video game", "category": "Entertainment"},
    {"description": "Subscription to Spotify", "category": "Entertainment"},
    {"description": "Bought a winter coat", "category": "Clothing"},
    {"description": "Purchased running shoes", "category": "Clothing"},
    {"description": "New sunglasses", "category": "Clothing"},
    {"description": "Online coding course", "category": "Education"},
    {"description": "Textbooks for the semester", "category": "Education"},
    {"description": "Workshop on digital marketing", "category": "Education"},
    {"description": "Donation to a local charity", "category": "Other Expenses"},
    {"description": "Gift for a family member", "category": "Other Expenses"},
    {"description": "Unexpected car repair", "category": "Other Expenses"},
    {"description": "Bought cereal and snacks", "category": "Groceries"},
    {"description": "Purchased fresh seafood", "category": "Groceries"},
    {"description": "Dinner at an Italian restaurant", "category": "Dining"},
    {"description": "Lunch at a sandwich shop", "category": "Dining"},
    {"description": "Ordered Indian food delivery", "category": "Dining"},
    {"description": "Paid the internet bill", "category": "Utilities"},
    {"description": "Monthly cable TV subscription", "category": "Utilities"},
    {"description": "Rent for the studio apartment", "category": "Rent/Mortgage"},
    {"description": "Mortgage payment for the condo", "category": "Rent/Mortgage"},
    {"description": "Bus pass for the month", "category": "Transportation"},
    {"description": "Taxi fare to the train station", "category": "Transportation"},
    {"description": "Eye exam and glasses", "category": "Health"},
    {"description": "Massage therapy session", "category": "Health"},
    {"description": "Tickets to a music festival", "category": "Entertainment"},
    {"description": "Purchased a new board game", "category": "Entertainment"},
    {"description": "Bought a summer dress", "category": "Clothing"},
    {"description": "New backpack for school", "category": "Clothing"},
    {"description": "Online photography course", "category": "Education"},
    {"description": "Seminar on personal finance", "category": "Education"},
    {"description": "Unexpected home repair", "category": "Other Expenses"},
]

In [56]:
df = pd.DataFrame(data)

# Preprocessing
X = df["description"]
y = df["category"]


In [57]:
# Feature extraction
vectorizer = TfidfVectorizer(stop_words="english")
X_tfidf = vectorizer.fit_transform(X)

In [58]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [93]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=20, random_state=42)  # 100 trees
rf_model.fit(X_train, y_train)

In [94]:
# Evaluate
y_pred = rf_model.predict(X_test)
print(classification_report(y_test, y_pred))

                precision    recall  f1-score   support

      Clothing       0.25      0.50      0.33         2
        Dining       1.00      0.50      0.67         4
     Education       1.00      1.00      1.00         1
 Entertainment       0.67      1.00      0.80         2
     Groceries       0.00      0.00      0.00         3
        Health       0.33      1.00      0.50         1
Other Expenses       0.67      1.00      0.80         2
 Rent/Mortgage       1.00      1.00      1.00         2
Transportation       1.00      0.67      0.80         3

      accuracy                           0.65        20
     macro avg       0.66      0.74      0.66        20
  weighted avg       0.68      0.65      0.62        20



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [95]:
# Print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 65.00%


In [97]:
# Predict new description
new_description = ""
new_description_tfidf = vectorizer.transform([new_description])
predicted_category = rf_model.predict(new_description_tfidf)
print(f"Predicted Category: {predicted_category[0]}")

Predicted Category: Health
