In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.45.0-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [None]:
import pandas as pd
import numpy as np

# Define some common expense categories
categories = ["Food", "Rent", "Transportation", "Utilities", "Entertainment", "Shopping", "Healthcare", "Education", "Travel", "Other"]

# Generate synthetic expense data
np.random.seed(42)
num_samples = 1000

descriptions = []
category_list = []
amounts = []

for _ in range(num_samples):
    cat = np.random.choice(categories)
    category_list.append(cat)
    amounts.append(np.random.uniform(10, 5000))

    if cat == 'Food':
        desc = f"{np.random.choice(['Grocery', 'Restaurant', 'Snacks', 'Meal'])} at {np.random.choice(['Local Market', 'Supermarket', 'Cafe'])}"
    elif cat == 'Rent':
        desc = f"Monthly {np.random.choice(['Apartment', 'House'])} Rent"
    elif cat == 'Transportation':
        desc = f"{np.random.choice(['Bus', 'Train', 'Fuel', 'Taxi'])} fare for {np.random.choice(['Office', 'Commute', 'Trip'])}"
    elif cat == 'Utilities':
        desc = f"{np.random.choice(['Electricity', 'Water', 'Gas', 'Internet'])} Bill Payment"
    elif cat == 'Entertainment':
        desc = f"Bought {np.random.choice(['Movie Ticket', 'Concert Ticket', 'Game', 'Book'])}"
    elif cat == 'Shopping':
        desc = f"Purchased {np.random.choice(['Clothes', 'Shoes', 'Electronics', 'Gift'])} from {np.random.choice(['Mall', 'Online Store'])}"
    elif cat == 'Healthcare':
        desc = f"Paid for {np.random.choice(['Doctor Visit', 'Medicine', 'Checkup'])}"
    elif cat == 'Education':
        desc = f"Tuition fee for {np.random.choice(['Course', 'Exam'])}"
    elif cat == 'Travel':
        desc = f"Flight/Train ticket to {np.random.choice(['Goa', 'Kerala', 'Delhi'])}"
    else:
        desc = f"Miscellaneous {np.random.choice(['Item', 'Service'])}"

    descriptions.append(desc)

# Create DataFrame
df_train = pd.DataFrame({
    'Description': descriptions,
    'Category': category_list,
    'Amount': amounts
})

print("Sample Training Data:")
print(df_train.head())
print("\nCategory Distribution in Training Data:")
print(df_train['Category'].value_counts())

# Train the Model
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
import pickle

model_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', MultinomialNB())
])

X_train = df_train['Description']
y_train = df_train['Category']

model_pipeline.fit(X_train, y_train)

# Save the trained model
import os
model_filename = 'expense_category_model.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(model_pipeline, file)

print(f"\nTrained model saved as '{model_filename}'")


Sample Training Data:
              Description    Category       Amount
0        Paid for Checkup  Healthcare  3984.749504
1  Tuition fee for Course   Education  2997.305836
2      Miscellaneous Item       Other   788.412656
3    Tuition fee for Exam   Education  1675.205970
4  Tuition fee for Course   Education  3257.933480

Category Distribution in Training Data:
Category
Food              116
Entertainment     111
Utilities         109
Transportation    102
Education         100
Other              98
Travel             94
Shopping           94
Healthcare         90
Rent               86
Name: count, dtype: int64

Trained model saved as 'expense_category_model.pkl'


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import smtplib
from email.mime.text import MIMEText
from datetime import datetime, timedelta
from collections import defaultdict
import numpy as np  # Import numpy

# Define some common expense categories
categories = ["Food", "Rent", "Transportation", "Utilities", "Entertainment", "Shopping", "Healthcare", "Education", "Travel", "Other"]

# Load the trained model
try:
    with open('expense_category_model.pkl', 'rb') as file:
        model_pipeline = pickle.load(file)
except FileNotFoundError:
    st.error("Error: Trained model file not found. Please run the training part first.")
    st.stop()

# Initialize session state for expenses and limits if they don't exist
if 'expenses' not in st.session_state:
    st.session_state['expenses'] = []
if 'limits' not in st.session_state:
    st.session_state['limits'] = defaultdict(float)

st.title("Personal Budget Categorizer")

# Input for new expenses
with st.form("add_expense"):
    description = st.text_input("Expense Description")
    amount = st.number_input("Amount", min_value=0.01)
    submitted = st.form_submit_button("Add Expense")

    if submitted and description and amount:
        predicted_category = model_pipeline.predict([description])[0]
        st.write(f"Predicted Category: {predicted_category}")
        st.session_state['expenses'].append({'Description': description, 'Amount': amount, 'Category': predicted_category, 'Date': datetime.now().strftime('%Y-%m-%d')})

# Display current expenses
if st.session_state['expenses']:
    st.subheader("Current Expenses")
    expenses_df = pd.DataFrame(st.session_state['expenses'])
    st.dataframe(expenses_df)

    # Daily Analysis
    st.subheader("Daily Spending Analysis")
    today = datetime.now()
    start_of_day = today.replace(hour=0, minute=0, second=0, microsecond=0)
    daily_expenses = [exp for exp in st.session_state['expenses'] if datetime.strptime(exp['Date'], '%Y-%m-%d') >= start_of_day]

    if daily_expenses:
        daily_df = pd.DataFrame(daily_expenses)
        daily_spending = daily_df.groupby('Category')['Amount'].sum().reset_index()

        # Graphical Representation (Pie Chart)
        fig_pie, ax_pie = plt.subplots()
        ax_pie.pie(daily_spending['Amount'], labels=daily_spending['Category'], autopct='%1.1f%%', startangle=90)
        ax_pie.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
        st.pyplot(fig_pie)

        # Graphical Representation (Bar Chart)
        fig_bar, ax_bar = plt.subplots()
        ax_bar.bar(daily_spending['Category'], daily_spending['Amount'])
        ax_bar.set_xlabel("Category")
        ax_bar.set_ylabel("Spending")
        ax_bar.set_title("Daily Spending by Category")
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        st.pyplot(fig_bar)
    else:
        st.info("No expenses recorded for today.")

# Set Spending Limits
st.subheader("Set Spending Limits")

# Use categories from session_state['expenses'] if available, else fallback to predefined categories
if st.session_state['expenses']:
    categories_to_use = sorted({expense['Category'] for expense in st.session_state['expenses']})
else:
    categories_to_use = sorted(categories)

for category in categories_to_use:
    limit = st.number_input(f"Limit for {category}", min_value=0.0, value=st.session_state['limits'].get(category, 0.0))
    st.session_state['limits'][category] = limit

# Check for exceeded limits and send email (on button click for demonstration)
st.subheader("Check Limits and Send Email")
email_address = st.text_input("Your Email Address")
sender_email = st.text_input("Sender Email Address (e.g., your Gmail)")
sender_password = st.text_input("Sender Email Password/App Password", type="password")
check_button = st.button("Check Limits and Send Email if Exceeded")

if check_button and email_address and sender_email and sender_password:
    exceeded_limits = {}
    daily_expenses = [exp for exp in st.session_state['expenses'] if datetime.strptime(exp['Date'], '%Y-%m-%d') >= start_of_day]
    daily_expenses_df = pd.DataFrame(daily_expenses) if daily_expenses else pd.DataFrame()

    if not daily_expenses_df.empty:
        daily_category_spending = daily_expenses_df.groupby('Category')['Amount'].sum()
        for category, spending in daily_category_spending.items():
            if category in st.session_state['limits'] and spending > st.session_state['limits'][category] > 0:
                exceeded_limits[category] = spending - st.session_state['limits'][category]

    if exceeded_limits:
        subject = "Budget Alert! Daily Spending Limit Exceeded"
        body = "Dear User,\n\nYour daily spending has exceeded the set limit in the following categories:\n"
        for category, amount in exceeded_limits.items():
            body += f"- {category}: Exceeded by ₹{amount:.2f}\n"
        body += "\nPlease review your spending.\n\nSincerely,\nYour Personal Budget Tracker"

        msg = MIMEText(body, _charset="utf-8")
        msg['Subject'] = subject
        msg['From'] = sender_email
        msg['To'] = email_address

        try:
            with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
                server.login(sender_email, sender_password)
                server.sendmail(sender_email, email_address, msg.as_string())
            st.success("Email sent successfully!")
        except Exception as e:
            st.error(f"Error sending email: {e}")
    else:
        st.info("No daily spending limits exceeded so far.")


Writing app.py


In [None]:
run app.py

2025-05-05 15:42:20.496 
  command:

    streamlit run app.py [ARGUMENTS]


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://104.196.131.129:8501[0m
[0m
[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0Kyour url is: https://ninety-goats-pay.loca.lt
