# 🧠 Retrain Expense Classifier
This notebook allows you to retrain the expense category classifier from a CSV file and export assets for Android app use.

In [None]:
# 📦 Install dependencies
!pip install pandas scikit-learn tensorflow tensorflowjs

In [None]:
# 📁 Upload your CSV file
from google.colab import files
uploaded = files.upload()

In [None]:
# 🧹 Load and preprocess data
import pandas as pd

df = pd.read_csv(next(iter(uploaded)))
df = df.dropna()
df['text'] = df['merchant'].astype(str) + ' ' + df['description'].astype(str)
X_text = df['text'].values
X_amount = df['amount'].astype(float).values.reshape(-1, 1)
y = df['category']

In [None]:
# 🔤 TF-IDF and Label Encoding
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

vectorizer = TfidfVectorizer(max_features=70)
X_text_tfidf = vectorizer.fit_transform(X_text).toarray()

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Save vocab and label mapping
import json
with open('tfidf_vocab.json', 'w') as f:
    json.dump(vectorizer.vocabulary_, f)

with open('label_classes.json', 'w') as f:
    json.dump({str(i): label for i, label in enumerate(label_encoder.classes_)}, f)

In [None]:
# 🔀 Train/Test split
import numpy as np
X_combined = [np.concatenate([text_vec, amount.flatten()]) for text_vec, amount in zip(X_text_tfidf, X_amount)]
X_combined = np.array(X_combined)
X_train, X_test, y_train, y_test = train_test_split(X_combined, y_encoded, test_size=0.2, random_state=42)

In [None]:
# 🤖 Build and train a simple model
import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Input(shape=(71,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
# 💾 Convert to TFLite model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open('expense_model.tflite', 'wb') as f:
    f.write(tflite_model)

# 📤 Download assets
from google.colab import files
files.download('expense_model.tflite')
files.download('tfidf_vocab.json')
files.download('label_classes.json')