In [None]:
import pandas as pd

# Load the dataset from Google Drive into a pandas DataFrame
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/PhD_Thesis_Experiments/sample_complaints_2years_005.csv')

In [None]:
# Display the first few rows of the DataFrame to understand the data structure
df.head()

Unnamed: 0,consumer_complaint_narrative,product,sub_product
0,using bread previously known cd supposed matur...,CheckingSavings,cdCertificateOfDeposit
1,presented holder huntington bank mi opened set...,CheckingSavings,cdCertificateOfDeposit
2,complaint nexbank bank employee purposely foll...,CheckingSavings,cdCertificateOfDeposit
3,chase bank month lock cd equal coming maturity...,CheckingSavings,cdCertificateOfDeposit
4,since synchrony bank giving runaround giving a...,CheckingSavings,cdCertificateOfDeposit


In [None]:
# Display the shape of the DataFrame (number of rows and columns)
df.shape

(161446, 3)

In [None]:
# Get the counts of each unique value in the 'product' column
product_sample_counts = df['product'].value_counts()

# Display the product counts
display(product_sample_counts)

Unnamed: 0_level_0,count
product,Unnamed: 1_level_1
CreditReporting,119345
DebtCollection,12784
CheckingSavings,6841
MoneyTransfer,6603
CreditCard,6587
CreditReportingRepair,2594
Mortgage,2350
StudentLoan,1860
VehicleLoanLease,1519
PaydayLoan,963


In [None]:
# Create a new column 'hierarchical_label' by combining 'product' and 'sub_product'
# This creates a single label representing the hierarchical classification
df['hierarchical_label'] = df['product'] + '::' + df['sub_product'].fillna('None') # Fill NaN sub_products with 'None'

# Display the first few hierarchical labels to verify the new column
display(df['hierarchical_label'].head())

# Get the unique hierarchical labels to see the different categories
unique_hierarchical_labels = df['hierarchical_label'].unique()
print("\nNumber of unique hierarchical labels:", len(unique_hierarchical_labels))

Unnamed: 0,hierarchical_label
0,CheckingSavings::cdCertificateOfDeposit
1,CheckingSavings::cdCertificateOfDeposit
2,CheckingSavings::cdCertificateOfDeposit
3,CheckingSavings::cdCertificateOfDeposit
4,CheckingSavings::cdCertificateOfDeposit



Number of unique hierarchical labels: 53


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Handle potential NaN values in the text column by replacing them with an empty string
df['consumer_complaint_narrative'] = df['consumer_complaint_narrative'].fillna('')

# Initialize TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(max_features=5000) # You can adjust max_features as needed

# Fit and transform the text data
X = tfidf_vectorizer.fit_transform(df['consumer_complaint_narrative'])

# Display the shape of the resulting matrix
print("Shape of TF-IDF matrix:", X.shape)

Shape of TF-IDF matrix: (161446, 5000)


### Evaluation Metrics: Hierarchical Precision, Hierarchical Recall, Hierarchical F1-score

To evaluate the hierarchical multi-class text classification model developed for categorizing consumer complaints into financial products and sub-products, a two-level hierarchical evaluation framework was implemented. The dataset contains a primary categorical label (“product”) and a secondary categorical label (“sub_product”), representing a parent–child relationship. To capture this hierarchical structure, a combined label (“hierarchical_label”) was constructed by concatenating the product and sub-product fields. The evaluation was conducted at both levels of the hierarchy using weighted precision, recall, and F1-scores to account for class imbalance. Specifically, metrics were first computed independently for the product level and the sub-product level, where sub-product metrics were calculated only for valid non-null sub-product instances. Finally, hierarchical precision, recall, and F1-score were derived by averaging the scores from both levels, ensuring that the evaluation reflected both the model’s ability to correctly classify broad product categories and its precision in identifying corresponding sub-products. This approach provided a balanced and interpretable assessment of the model’s performance in capturing hierarchical relationships among financial complaint categories.

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Function to split hierarchical labels into product and sub-product
def split_hierarchical_label(label):
    if '::' in label:
        return label.split('::')
    else:
        return [label, 'None'] # Handle cases with no sub-product

# Function to calculate hierarchical metrics
def hierarchical_metrics(y_true, y_pred):
    product_true = [split_hierarchical_label(label)[0] for label in y_true]
    sub_product_true = [split_hierarchical_label(label)[1] for label in y_true]
    product_pred = [split_hierarchical_label(label)[0] for label in y_pred]
    sub_product_pred = [split_hierarchical_label(label)[1] for label in y_pred]

    # Calculate metrics at the product level
    product_precision = precision_score(product_true, product_pred, average='weighted', zero_division=0)
    product_recall = recall_score(product_true, product_pred, average='weighted', zero_division=0)
    product_f1 = f1_score(product_true, product_pred, average='weighted', zero_division=0)

    # Calculate metrics at the sub-product level (only for non-None sub-products)
    # We need to filter for cases where both true and predicted sub-products are not 'None'
    valid_sub_product_true = [sub for i, sub in enumerate(sub_product_true) if sub != 'None' and sub_product_pred[i] != 'None']
    valid_sub_product_pred = [sub for i, sub in enumerate(sub_product_pred) if sub != 'None' and sub_product_true[i] != 'None']


    sub_product_precision = precision_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_recall = recall_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0
    sub_product_f1 = f1_score(valid_sub_product_true, valid_sub_product_pred, average='weighted', zero_division=0) if valid_sub_product_true else 0


    # A simple way to combine scores (can be weighted based on importance)
    # Here, we'll just average them
    hierarchical_precision = (product_precision + sub_product_precision) / 2
    hierarchical_recall = (product_recall + sub_product_recall) / 2
    hierarchical_f1 = (product_f1 + sub_product_f1) / 2

    return {
        'product_precision': product_precision,
        'product_recall': product_recall,
        'product_f1': product_f1,
        'sub_product_precision': sub_product_precision,
        'sub_product_recall': sub_product_recall,
        'sub_product_f1': sub_product_f1,
        'hierarchical_precision': hierarchical_precision,
        'hierarchical_recall': hierarchical_recall,
        'hierarchical_f1': hierarchical_f1
    }


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Split data into training and testing sets
# Using the combined hierarchical labels as the target variable
y = df['hierarchical_label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:


# Initialize and train the Logistic Regression model
# Increased max_iter for convergence
logistic_regression_model = LogisticRegression(max_iter=1000)
logistic_regression_model.fit(X_train, y_train)

print("Logistic Regression model trained successfully.")

Logistic Regression model trained successfully.


In [None]:
# Make predictions on the test set
y_pred = logistic_regression_model.predict(X_test)

# Calculate hierarchical metrics
metrics = hierarchical_metrics(y_test, y_pred)

# Display the metrics
print("Hierarchical Classification Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

Hierarchical Classification Metrics:
product_precision: 0.8799
product_recall: 0.8859
product_f1: 0.8704
sub_product_precision: 0.8524
sub_product_recall: 0.8704
sub_product_f1: 0.8500
hierarchical_precision: 0.8661
hierarchical_recall: 0.8781
hierarchical_f1: 0.8602


## Hierarchical Classification Final Results

| Metric Type | Precision | Recall | F1-Score |
| :--- | :---: | :---: | :---: |
| **Product Level** | 0.8799 | 0.8859 | 0.8704 |
| **Sub-Product Level** | 0.8524 | 0.8704 | 0.8500 |
| **Overall Hierarchical** | 0.8661 | 0.8781 | 0.8602 |

---

### Key Performance Indicators:

* **Hierarchical F1-Score:** **0.8602**
* **Hierarchical Recall:** **0.8602**
* **Hierarchical Precision:** **0.8602**


## Explaination of Metrics: Model Evaluation Metrics for Hierarchical Classification

---

### Product-level Metrics (Precision, Recall, F1)

These metrics evaluate the model's performance in classifying the **primary 'product' category**.

* **Product Precision:** Measures the accuracy of the positive predictions at the product level. It tells you what proportion of the predicted products were actually correct.
* **Product Recall:** Measures the model's ability to find all the positive instances at the product level. It tells you what proportion of the actual products were correctly identified.
* **Product F1-score:** The harmonic mean of product precision and recall. It provides a single score that balances both metrics.

---

### Sub-product-level Metrics (Precision, Recall, F1)

These metrics evaluate the model's performance in classifying the **secondary 'sub\_product' category**.

**Important Note:** The evaluation is only for cases where both the true and predicted sub-products are **not 'None'**. This exclusion is made because the 'None' sub-product often represents cases where a sub-product doesn't exist or wasn't specified, and including it might not be meaningful for sub-product performance evaluation.

* **Sub-product Precision:** Measures the accuracy of the positive predictions at the sub-product level (for non-'None' sub-products).
* **Sub-product Recall:** Measures the model's ability to find all the positive instances at the sub-product level (for non-'None' sub-products).
* **Sub-product F1-score:** The harmonic mean of sub-product precision and recall (for non-'None' sub-products).

---

## Hierarchical Metrics (Precision, Recall, F1)

These metrics are a simple **average** of the corresponding product and sub-product metrics. They provide a single score that attempts to capture the model's performance across **both levels of the hierarchy**.

The calculation is as follows:

$$
\text{Hierarchical Precision} = \frac{\text{Product Precision} + \text{Sub-product Precision}}{2}
$$

$$
\text{Hierarchical Recall} = \frac{\text{Product Recall} + \text{Sub-product Recall}}{2}
$$

$$
\text{Hierarchical F1-score} = \frac{\text{Product F1-score} + \text{Sub-product F1-score}}{2}
$$

In [None]:
from sklearn.svm import LinearSVC

# Initialize and train the Linear SVM model
# LinearSVC is often preferred for large datasets
linear_svm_model = LinearSVC(max_iter=10000) # Increased max_iter for convergence
linear_svm_model.fit(X_train, y_train)

print("Linear SVM model trained successfully.")

Linear SVM model trained successfully.


In [None]:
# Make predictions on the test set using the Linear SVM model
y_pred_svm = linear_svm_model.predict(X_test)

# Calculate hierarchical metrics for the Linear SVM model
metrics_svm = hierarchical_metrics(y_test, y_pred_svm)

# Display the metrics
print("Linear SVM Hierarchical Classification Metrics:")
for metric, value in metrics_svm.items():
    print(f"{metric}: {value:.4f}")

Linear SVM Hierarchical Classification Metrics:
product_precision: 0.8762
product_recall: 0.8858
product_f1: 0.8716
sub_product_precision: 0.8467
sub_product_recall: 0.8692
sub_product_f1: 0.8524
hierarchical_precision: 0.8614
hierarchical_recall: 0.8775
hierarchical_f1: 0.8620


## Hierarchical Classification Final Results: Support Vector Machines (SVM)

| Metric Type | Precision | Recall | F1-Score |
| :--- | :---: | :---: | :---: |
| **Product Level** | 0.8762 | 0.8858 | 0.8716 |
| **Sub-Product Level** | 0.8467 | 0.8692 | 0.8524 |
| **Overall Hierarchical** | 0.8614 | 0.8775 | 0.8620 |

---

### Key Performance Indicators:

* **Hierarchical F1-Score:** **0.8620**
* **Hierarchical Recall:** **0.8775**
* **Hierarchical Precision:** **0.8614**


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train the Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=42)
decision_tree_model.fit(X_train, y_train)

print("Decision Tree model trained successfully.")

Decision Tree model trained successfully.


In [None]:
# Make predictions on the test set using the Decision Tree model
y_pred_tree = decision_tree_model.predict(X_test)

# Calculate hierarchical metrics for the Decision Tree model
metrics_tree = hierarchical_metrics(y_test, y_pred_tree)

# Display the metrics
print("Decision Tree Hierarchical Classification Metrics:")
for metric, value in metrics_tree.items():
    print(f"{metric}: {value:.4f}")

Decision Tree Hierarchical Classification Metrics:
product_precision: 0.8365
product_recall: 0.8436
product_f1: 0.8395
sub_product_precision: 0.8098
sub_product_recall: 0.8201
sub_product_f1: 0.8145
hierarchical_precision: 0.8231
hierarchical_recall: 0.8319
hierarchical_f1: 0.8270


## Hierarchical Classification Final Results: Decision Tree

| Metric Type | Precision | Recall | F1-Score |
| :--- | :---: | :---: | :---: |
| **Product Level** | 0.8365 | 0.8436 | 0.8395 |
| **Sub-Product Level** | 0.8098 | 0.8201 | 0.8145 |
| **Overall Hierarchical** | 0.8231 | 0.8319 | 0.8270 |

---

### Key Performance Indicators:

* **Hierarchical F1-Score:** **0.8270**
* **Hierarchical Recall:** **0.8319**
* **Hierarchical Precision:** **0.8231**


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train the Random Forest model
random_forest_model = RandomForestClassifier(random_state=42)
random_forest_model.fit(X_train, y_train)

print("Random Forest model trained successfully.")

Random Forest model trained successfully.


In [None]:
# Make predictions on the test set using the Random Forest model
y_pred_rf = random_forest_model.predict(X_test)

# Calculate hierarchical metrics for the Random Forest model
metrics_rf = hierarchical_metrics(y_test, y_pred_rf)

# Display the metrics
print("Random Forest Hierarchical Classification Metrics:")
for metric, value in metrics_rf.items():
    print(f"{metric}: {value:.4f}")

Random Forest Hierarchical Classification Metrics:
product_precision: 0.8635
product_recall: 0.8618
product_f1: 0.8379
sub_product_precision: 0.8249
sub_product_recall: 0.8533
sub_product_f1: 0.8173
hierarchical_precision: 0.8442
hierarchical_recall: 0.8576
hierarchical_f1: 0.8276


## Hierarchical Classification Final Results: Random Forest

| Metric Type | Precision | Recall | F1-Score |
| :--- | :---: | :---: | :---: |
| **Product Level** | 0.8635 | 0.8618 | 0.8379 |
| **Sub-Product Level** | 0.8249 | 0.8533 | 0.8173 |
| **Overall Hierarchical** | 0.8442 | 0.8576 | 0.8276 |

---

### Key Performance Indicators:

* **Hierarchical F1-Score:** **0.8276**
* **Hierarchical Recall:** **0.8576**
* **Hierarchical Precision:** **0.8442**

### Testing the Model

In [None]:
# Function to predict product and sub-product for a given text
def predict_product_subproduct(complaint_text, model, vectorizer):
    # Preprocess the input text
    # Ensure the text is a string, even if NaN
    complaint_text = str(complaint_text) if pd.isna(complaint_text) else complaint_text

    # Vectorize the input text using the trained TF-IDF vectorizer
    X_new = vectorizer.transform([complaint_text])

    # Predict the hierarchical label using the trained model
    predicted_hierarchical_label = model.predict(X_new)[0]

    # Split the hierarchical label into product and sub-product
    predicted_product, predicted_sub_product = split_hierarchical_label(predicted_hierarchical_label)

    return predicted_product, predicted_sub_product



In [None]:
# Example usage:
# Replace with your desired consumer complaint text
new_complaint = "My bank account was charged an overdraft fee that I believe is incorrect."

# Make a prediction using the Random Forest model
predicted_product_rf, predicted_sub_product_rf = predict_product_subproduct(new_complaint, random_forest_model, tfidf_vectorizer)

print(f"Consumer Complaint: {new_complaint}")
print(f"Predicted Product (Random Forest): {predicted_product_rf}")
print(f"Predicted Sub-Product (Random Forest): {predicted_sub_product_rf}")

Consumer Complaint: My bank account was charged an overdraft fee that I believe is incorrect.
Predicted Product (Random Forest): CheckingSavings
Predicted Sub-Product (Random Forest): checkingAccount


## Model Performance Comparison

| Metric Type | Logistic Regression | Linear SVM | Decision Tree | Random Forest |
| :--- | :---: | :---: | :---: | :---: |
| **Product Precision** | 0.8799 | 0.8762 | 0.8365 | 0.8635 |
| **Product Recall** | 0.8859 | 0.8858 | 0.8436 | 0.8618 |
| **Product F1-Score** | 0.8704 | 0.8716 | 0.8395 | 0.8379 |
| **Sub-Product Precision** | 0.8524 | 0.8467 | 0.8098 | 0.8249 |
| **Sub-Product Recall** | 0.8704 | 0.8692 | 0.8201 | 0.8533 |
| **Sub-Product F1-Score** | 0.8500 | 0.8524 | 0.8145 | 0.8173 |
| **Hierarchical Precision** | 0.8661 | 0.8614 | 0.8231 | 0.8442 |
| **Hierarchical Recall** | 0.8781 | 0.8775 | 0.8319 | 0.8576 |
| **Hierarchical F1-Score** | 0.8602 | 0.8620 | 0.8270 | 0.8276 |