In [18]:
from transformers import pipeline


classifier = pipeline(
    "zero-shot-classification",
    model="valhalla/distilbart-mnli-12-3"
)


def Runmodel(description):
    labels = [
        "Shopping",
        "Food",
        "Investments",
        "Entertainment",
        "Travel",
        "Utilities",
        "Insurance",
    ]
    hypothesis_template = "This text is about {}."
    prediction = classifier(
        description, labels, hypothesis_template=hypothesis_template, multi_labels=True
    )
    result = []
    for pred in prediction:
        print(pred)
        result.append(pred['labels'][0] if float(pred['scores'][0]) >= 0.5 else 'Misc.')
    return result


In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the CSV data into a DataFrame
data = pd.read_csv('transactions.csv')

# Extract the 'description' and 'category' columns
descriptions = data['description']
categories = data['category']

# Split the data into training and testing sets
train_descriptions, test_descriptions, train_categories, test_categories = train_test_split(
    descriptions, categories, test_size=0.2, random_state=42)

# Create a vectorizer to convert text data into numerical features
vectorizer = CountVectorizer()

# Convert the text descriptions into numerical features
X_train = vectorizer.fit_transform(train_descriptions)
X_test = vectorizer.transform(test_descriptions)

# Train a logistic regression classifier
classifier = LogisticRegression()
classifier.fit(X_train, train_categories)

# Predict categories for the testing data
predictions = classifier.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(test_categories, predictions)
precision = precision_score(test_categories, predictions, average='weighted', zero_division=0)
recall = recall_score(test_categories, predictions, average='weighted', zero_division=0)
f1 = f1_score(test_categories, predictions, average='weighted')

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)


Accuracy: 0.6666666666666666
Precision: 0.6958333333333334
Recall: 0.6666666666666666
F1 score: 0.6619995501574448
