In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report

# Load your labeled dataset
# Assuming df is your DataFrame with columns like 'sms', 'carrier', 'date_time', 'amount', etc.
# Replace 'your_dataset.csv' with the actual path or name of your dataset file.
df = pd.read_csv('sms.csv')

# Select relevant columns and create a new DataFrame
selected_columns = ['sms', 'date_time', 'amount', 'transaction_type', 'fees',
                    'acccount', 'receiver_sender', 'new_balance', 'category', 'sub_category']
df_selected = df[selected_columns]

# Handle missing values
df_selected = df_selected.dropna(subset=['date_time', 'amount', 'transaction_type',  'fees',
                                          'acccount', 'receiver_sender', 'new_balance', 'category', 'sub_category'])

# Convert target columns to strings
for col in df_selected.columns[1:]:
    df_selected[col] = df_selected[col].astype(str)
# Split the dataset into training and testing sets
X = df_selected[['sms']]
y = df_selected.drop(['sms'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a preprocessor with CountVectorizer for 'sms' and RandomForestClassifier
preprocessor = ColumnTransformer(
    transformers=[
        ('sms', CountVectorizer(), 'sms'),
    ],
    remainder='passthrough'
)

# Create a pipeline with preprocessor and MultiOutputClassifier with RandomForestClassifier
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', MultiOutputClassifier(RandomForestClassifier()))
])

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions on the test set
predictions = pipeline.predict(X_test)

# Assuming 'labels' is the list of target column names
labels = ['date_time', 'amount', 'transaction_type', 'fees',
          'acccount', 'receiver_sender', 'new_balance', 'category', 'sub_category']

# Calculate and print the classification report for each target
for i, label in enumerate(labels):
    print(f"Classification Report for {label}:")
    print(classification_report(y_test[label], predictions[:, i]))
    print("="*50)


Classification Report for date_time:
                     precision    recall  f1-score   support

2024-02-26 06:46:48       0.00      0.00      0.00       0.0
2024-02-26 12:43:32       0.00      0.00      0.00       1.0
2024-02-26 14:18:37       0.00      0.00      0.00       0.0
2024-02-27 14:49:36       0.00      0.00      0.00       1.0
2024-02-27 18:57:19       0.00      0.00      0.00       1.0
2024-02-27 19:47:37       0.00      0.00      0.00       0.0
2024-02-28 06:22:17       0.00      0.00      0.00       1.0
2024-02-28 12:47:09       0.00      0.00      0.00       0.0
2024-02-28 12:50:04       0.00      0.00      0.00       1.0
2024-02-28 17:30:34       0.00      0.00      0.00       1.0
2024-03-04 17:53:44       0.00      0.00      0.00       0.0
2024-03-04 18:55:54       0.00      0.00      0.00       1.0

           accuracy                           0.00       7.0
          macro avg       0.00      0.00      0.00       7.0
       weighted avg       0.00      0.00      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize