In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('data_original.csv')

# Baseline (averagin)

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.preprocessing import LabelEncoder

# Assuming 'df' is your DataFrame

# Ensure 'islem_tarihi' is a datetime column
df['islem_tarihi'] = pd.to_datetime(df['islem_tarihi'])

# Sort transactions by customer ID and transaction date
df_sorted = df.sort_values(by=['musteri_id_mask', 'islem_tarihi'])

# Filter customers with at least 10 transactions
counts = df_sorted['musteri_id_mask'].value_counts()
df_filtered = df_sorted[df_sorted['musteri_id_mask'].isin(counts[counts >= 10].index)]

# Encode categories for easier processing
encoder = LabelEncoder()
df_filtered['category_encoded'] = encoder.fit_transform(df_filtered['new_category_name_eng'])

# Determine the period of aggregation (e.g., month)
df_filtered['period'] = df_filtered['islem_tarihi'].dt.to_period("M")

# Get the 10th purchase category for each customer
df_filtered['transaction_order'] = df_filtered.groupby('musteri_id_mask').cumcount() + 1
actual_categories = df_filtered[df_filtered['transaction_order'] == 10]

# Aggregate transactions
aggregated_purchases = df_filtered[df_filtered['transaction_order'] < 10].groupby(['musteri_id_mask', 'period', 'category_encoded']).size().unstack(fill_value=0)

# Determine if a purchase was made in each category (>0)
purchases_indicator = aggregated_purchases > 0

# Calculate the probability of purchase in each category for each client
probability_of_purchase = purchases_indicator.groupby('musteri_id_mask').mean()

# Predict the category with the highest probability for each client
predicted_categories = probability_of_purchase.idxmax(axis=1)

# Map predicted categories to actual categories for evaluation
actual_encoded = actual_categories.set_index('musteri_id_mask')['category_encoded']
predicted_categories_filtered = predicted_categories[actual_encoded.index]  # Ensure alignment of indices

# Evaluate predictions
accuracy = accuracy_score(actual_encoded, predicted_categories_filtered)
precision, recall, f1, _ = precision_recall_fscore_support(actual_encoded, predicted_categories_filtered, average='weighted', zero_division=0)

print(f"Baseline Method Evaluation")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# LSTM

In [3]:
# Convert 'islem_tarihi' to datetime format if it's not already
df['islem_tarihi'] = pd.to_datetime(df['islem_tarihi'])

# Sort transactions by customer and transaction date
df_sorted = df.sort_values(by=['musteri_id_mask', 'islem_tarihi'])

# Filter data to include customers with at least 10 transactions
counts = df_sorted['musteri_id_mask'].value_counts()
df_filtered = df_sorted[df_sorted['musteri_id_mask'].isin(counts[counts >= 10].index)]

# Encode categories
le = LabelEncoder()
df_filtered['new_category_name_eng_encoded'] = le.fit_transform(df_filtered['new_category_name_eng'])

def create_sequences_with_ids(df, sequence_length=9):
    sequences = []
    labels = []
    customer_ids = []  # To track the customer ID for each sequence
    customers = df['musteri_id_mask'].unique()
    for customer in customers:
        customer_df = df[df['musteri_id_mask'] == customer].copy()
        for i in range(len(customer_df) - sequence_length):
            sequence = customer_df.iloc[i:i+sequence_length]['new_category_name_eng_encoded'].values
            label = customer_df.iloc[i+sequence_length]['new_category_name_eng_encoded']
            sequences.append(sequence)
            labels.append(label)
            customer_ids.append(customer)  # Track the customer ID
    return np.array(sequences), np.array(labels), np.array(customer_ids)

# Generate sequences along with their customer IDs
sequences, labels, customer_ids = create_sequences_with_ids(df_filtered)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['new_category_name_eng_encoded'] = le.fit_transform(df_filtered['new_category_name_eng'])


In [4]:
# Split the data, keeping track of which sequences belong to which set
X_train, X_test, y_train, y_test, ids_train, ids_test = train_test_split(sequences, labels, customer_ids, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_layer_size, output_size):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq)
        predictions = self.linear(lstm_out[:, -1])
        return predictions

# Instantiate the model
model = LSTMModel(input_size=1, hidden_layer_size=128, output_size=len(le.classes_))
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [6]:
# Training loop
epochs = 3

for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, X_batch.size(0), model.hidden_layer_size),
                             torch.zeros(1, X_batch.size(0), model.hidden_layer_size))
        y_pred = model(X_batch)
        loss = loss_function(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    
    # Validation step...
    # Remember to implement validation logic to monitor model performance

print("Training completed")

Training completed


In [9]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import torch

# Assume your model, test_loader, and torch are properly defined and imported

# Ensure the model is in evaluation mode
model.eval()

# Collect all predictions and true labels here
all_predictions = []
all_true_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        # Process each batch
        y_pred = model(X_batch)
        _, predicted_labels = torch.max(y_pred, 1)
        
        all_predictions.extend(predicted_labels.cpu().numpy())
        all_true_labels.extend(y_batch.cpu().numpy())

# Convert lists to NumPy arrays for further analysis
all_predictions = np.array(all_predictions)
all_true_labels = np.array(all_true_labels)

# Calculate metrics
accuracy = accuracy_score(all_true_labels, all_predictions)
precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average="weighted")

# print the metrics first in overall instead of class wise
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Print precision, recall, and F1 score for each class. Also print the class names
precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average=None)
# Assuming `le` is your label encoder and it's defined and fitted to your classes somewhere in your code
for i, (prec, rec, f) in enumerate(zip(precision, recall, f1)):
    print(f"Class {i} - {le.classes_[i]}")  # Correct this line according to how you've defined your classes
    print(f"Precision: {prec}")
    print(f"Recall: {rec}")
    print(f"F1 Score: {f}")

Accuracy: 0.7258825983087482
Precision: 0.7177254672583707
Recall: 0.7258825983087482
F1 Score: 0.7169376943571838
Class 0 - Clothing
Precision: 0.6307503578000409
Recall: 0.44999270676326153
F1 Score: 0.5252553916004541
Class 1 - Gas stations
Precision: 0.6915847310584152
Recall: 0.6335099337748344
F1 Score: 0.6612747131204202
Class 2 - Grocery
Precision: 0.7559624909010462
Recall: 0.8506110780991536
F1 Score: 0.8004987531172069


# Preprocess Data for Different Sequence Lengths

In [11]:
# Step 1: Filter the Unseen Test Data
df_test = df_filtered[df_filtered['musteri_id_mask'].isin(ids_test)]

# Helper function to generate sequences from filtered test data
def create_test_sequences(df, sequence_length):
    sequences = []
    labels = []
    customer_ids = df['musteri_id_mask'].unique()
    for customer_id in customer_ids:
        customer_df = df[df['musteri_id_mask'] == customer_id]
        for i in range(len(customer_df) - sequence_length):
            sequence = customer_df.iloc[i:i+sequence_length]['new_category_name_eng_encoded'].values
            label = customer_df.iloc[i+sequence_length]['new_category_name_eng_encoded']
            sequences.append(sequence)
            labels.append(label)
    return np.array(sequences), np.array(labels)

In [12]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

# Assume create_test_sequences, df_test, and model are correctly defined

# Class names mapping, replace class_indices with your actual class indices and names
class_names = {0: 'Clothing', 1: 'Gas stations', 2: 'Grocery'}

# Step 2: Generate Sequences of Different Lengths
sequence_lengths = [4, 6, 14]  # Corresponds to desired sequence lengths of 5, 7, and 15
for sequence_length in sequence_lengths:
    sequences, labels = create_test_sequences(df_test, sequence_length)
    
    # Step 3: Convert to Tensors
    X_tensor = torch.tensor(sequences, dtype=torch.float32).unsqueeze(-1)
    y_tensor = torch.tensor(labels, dtype=torch.long)
    
    # Create DataLoader
    test_loader = DataLoader(TensorDataset(X_tensor, y_tensor), batch_size=64, shuffle=False)
    
    # Evaluation
    model.eval()
    all_predictions = []
    all_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            _, predicted_labels = torch.max(y_pred, 1)
            
            all_predictions.extend(predicted_labels.cpu().numpy())
            all_true_labels.extend(y_batch.cpu().numpy())

    # Convert lists to NumPy arrays for analysis
    all_predictions = np.array(all_predictions)
    all_true_labels = np.array(all_true_labels)

    # Calculate metrics for each class
    accuracy = accuracy_score(all_true_labels, all_predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average="weighted")

    print(f"Sequence Length: {sequence_length + 1}")
    # print first the overall metrics instead of class wise
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

    # Print precision, recall, and F1 score for each class. Also print the class names
    precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average=None)

    # Print precision, recall, and F1 score for each class, including class names
    for i, (prec, rec, f) in enumerate(zip(precision, recall, f1)):
        print(f"{class_names[i]} - Precision: {prec}, Recall: {rec}, F1 Score: {f}")
    print("\n")


Sequence Length: 5
Accuracy: 0.7061810863919166
Precision: 0.6990482753169686
Recall: 0.7061810863919166
F1 Score: 0.6961065174714257
Clothing - Precision: 0.5908685993933448, Recall: 0.4595926816005052, F1 Score: 0.5170278790509972
Gas stations - Precision: 0.7129880594828217, Recall: 0.5499846623290657, F1 Score: 0.6209674846765556
Grocery - Precision: 0.7310389047382945, Recall: 0.8493294045619189, F1 Score: 0.7857571352438776


Sequence Length: 7
Accuracy: 0.7162044848640822
Precision: 0.7081277790982488
Recall: 0.7162044848640822
F1 Score: 0.7080649767263395
Clothing - Precision: 0.6155037997097956, Recall: 0.4586555730674779, F1 Score: 0.5256282356894577
Gas stations - Precision: 0.6821978872136825, Recall: 0.6188518769163931, F1 Score: 0.6489827746333258
Grocery - Precision: 0.7490383233665314, Recall: 0.8393188912009513, F1 Score: 0.7916128806154098


Sequence Length: 15
Accuracy: 0.7347748833632934
Precision: 0.7265379463574917
Recall: 0.7347748833632934
F1 Score: 0.7250783863

# CNN

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import torch.nn.functional as F  #

In [17]:
import pandas as pd
#read vsv data
df = pd.read_csv('data_original.csv')
# Convert 'islem_tarihi' to datetime format if it's not already
df['islem_tarihi'] = pd.to_datetime(df['islem_tarihi'])

# Sort transactions by customer and transaction date
df_sorted = df.sort_values(by=['musteri_id_mask', 'islem_tarihi'])

# Filter data to include customers with at least 10 transactions
counts = df_sorted['musteri_id_mask'].value_counts()
df_filtered = df_sorted[df_sorted['musteri_id_mask'].isin(counts[counts >= 10].index)]

# Encode categories
le = LabelEncoder()
df_filtered['new_category_name_eng_encoded'] = le.fit_transform(df_filtered['new_category_name_eng'])

def create_sequences_with_ids(df, sequence_length=9):
    sequences = []
    labels = []
    customer_ids = []  # To track the customer ID for each sequence
    customers = df['musteri_id_mask'].unique()
    for customer in customers:
        customer_df = df[df['musteri_id_mask'] == customer].copy()
        for i in range(len(customer_df) - sequence_length):
            sequence = customer_df.iloc[i:i+sequence_length]['new_category_name_eng_encoded'].values
            label = customer_df.iloc[i+sequence_length]['new_category_name_eng_encoded']
            sequences.append(sequence)
            labels.append(label)
            customer_ids.append(customer)  # Track the customer ID
    return np.array(sequences), np.array(labels), np.array(customer_ids)

# Generate sequences along with their customer IDs
sequences, labels, customer_ids = create_sequences_with_ids(df_filtered)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['new_category_name_eng_encoded'] = le.fit_transform(df_filtered['new_category_name_eng'])


In [18]:
# Reshape data for CNN: Assuming sequences.shape is (num_samples, sequence_length)
# For CNN, we need to add an additional dimension to mimic 'channels' in images
X_cnn = sequences.reshape(sequences.shape[0], 1, sequences.shape[1])

In [19]:
# Split the data (ensure this matches the sequence shape expected by the CNN)
X_train, X_test, y_train, y_test = train_test_split(X_cnn, labels, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define a basic CNN model
class CNNWithGlobalPooling(nn.Module):
    def __init__(self, num_classes):
        super(CNNWithGlobalPooling, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool1d(1)  # Global Average Pooling
        self.fc = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
        x = self.fc(x)
        return x

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

# Define the CNN model with global pooling to handle variable-length inputs
class CNNWithGlobalPooling(nn.Module):
    def __init__(self, num_classes):
        super(CNNWithGlobalPooling, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.global_pool = nn.AdaptiveAvgPool1d(1)  # Global Average Pooling
        self.fc = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.global_pool(x)
        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
        x = self.fc(x)
        return x

# Assuming labels, sequences, etc. are already prepared as per your initial code
# You may need to adapt how you prepare 'X_cnn' if the input dimensions have changed
# Convert sequences and labels to PyTorch tensors
# No need to reshape X_cnn as in your initial approach, unless you're adding a channel dimension

num_classes = len(np.unique(labels))  # Assuming 'labels' are already defined

# Instantiate the model
model = CNNWithGlobalPooling(num_classes=num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Adjusted training loop to accommodate the new model architecture
epochs = 5
for epoch in range(epochs):
    model.train()
    for i, (X_batch, y_batch) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')



Epoch [1/5], Loss: 0.6762
Epoch [2/5], Loss: 0.6534
Epoch [3/5], Loss: 0.7353
Epoch [4/5], Loss: 0.6317
Epoch [5/5], Loss: 0.5145


In [26]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import torch
from torch.utils.data import DataLoader

# Assuming the existence of 'test_loader' and a PyTorch model 'model'
class_names = {0: 'Clothing', 1: 'Gas stations', 2: 'Grocery'}
model.eval()

all_predictions = []
all_true_labels = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_pred = model(X_batch)
        _, predicted_labels = torch.max(y_pred, 1)
        
        all_predictions.extend(predicted_labels.cpu().numpy())
        all_true_labels.extend(y_batch.cpu().numpy())

all_predictions = np.array(all_predictions)
all_true_labels = np.array(all_true_labels)

# Calculate overall metrics
accuracy = accuracy_score(all_true_labels, all_predictions)
precision_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average='weighted')

print(f"Overall Metrics (Weighted Average):")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision_weighted}")
print(f"Recall: {recall_weighted}")
print(f"F1 Score: {f1_weighted}")

# Calculate class-wise metrics
precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average=None)

# Print class-wise metrics
for i, (prec, rec, f) in enumerate(zip(precision, recall, f1)):
    print(f"Class {i} - {class_names[i]}: Precision: {prec}, Recall: {rec}, F1 Score: {f}")


Overall Metrics (Weighted Average):
Accuracy: 0.723294528080088
Precision: 0.7155741710472959
Recall: 0.723294528080088
F1 Score: 0.7157682208745819
Class 0 - Clothing: Precision: 0.603312399950745, Recall: 0.47644284533475956, F1 Score: 0.5324241354016681
Class 1 - Gas stations: Precision: 0.7058973574125089, Recall: 0.6109050772626932, F1 Score: 0.6549749124301808
Class 2 - Grocery: Precision: 0.7561746122917863, Recall: 0.8457128173832046, F1 Score: 0.798441326085803


# Preprocess Data for Different Sequence Lengths

In [27]:
# Step 1: Filter the Unseen Test Data
df_test = df_filtered[df_filtered['musteri_id_mask'].isin(ids_test)]

In [30]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

# Assuming df_test, create_test_sequences, and model are correctly defined

# Helper function to generate sequences for CNN (with channel dimension)
def prepare_sequences_for_cnn(df, sequence_length):
    sequences, labels = create_test_sequences(df, sequence_length)
    X = torch.tensor(sequences, dtype=torch.float32).unsqueeze(1)  # Add channel dimension for CNN
    y = torch.tensor(labels, dtype=torch.long)
    return X, y

# Function to evaluate the model for CNN, updated to return overall and class-wise metrics
def evaluate_model_cnn(X, y, model, class_names):
    test_loader = DataLoader(TensorDataset(X, y), batch_size=64, shuffle=False)
    model.eval()
    all_predictions = []
    all_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            _, predicted_labels = torch.max(y_pred, 1)
            all_predictions.extend(predicted_labels.cpu().numpy())
            all_true_labels.extend(y_batch.cpu().numpy())

    # Calculate overall metrics
    accuracy = accuracy_score(all_true_labels, all_predictions)
    overall_precision, overall_recall, overall_f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average='weighted')

    # Calculate metrics by class
    precision, recall, f1, _ = precision_recall_fscore_support(all_true_labels, all_predictions, average=None, labels=list(class_names.keys()))
    metrics_by_class = {}
    for i, class_id in enumerate(class_names.keys()):
        metrics_by_class[class_names[class_id]] = {
            "Precision": precision[i],
            "Recall": recall[i],
            "F1 Score": f1[i]
        }

    return accuracy, overall_precision, overall_recall, overall_f1, metrics_by_class

# Class names mapping, update with your real class names
class_names = {0: 'Clothing', 1: 'Gas stations', 2: 'Grocery'}

# Generate sequences for different lengths and evaluate
sequence_lengths = [4, 6, 14]  # For sequence lengths of 5, 7, and 15 respectively
results = {}
for length in sequence_lengths:
    X, y = prepare_sequences_for_cnn(df_test, length)
    accuracy, overall_precision, overall_recall, overall_f1, metrics_by_class = evaluate_model_cnn(X, y, model, class_names)
    results[f"Sequence Length {length + 1}"] = {
        "Accuracy": accuracy, 
        "Overall Precision": overall_precision, 
        "Overall Recall": overall_recall, 
        "Overall F1": overall_f1, 
        "Metrics by Class": metrics_by_class
    }

# Print the results
for seq_len, metrics in results.items():
    print(seq_len)
    print(f"Accuracy: {metrics['Accuracy']:.4f}")
    print(f"Overall Precision: {metrics['Overall Precision']:.4f}")
    print(f"Overall Recall: {metrics['Overall Recall']:.4f}")
    print(f"Overall F1: {metrics['Overall F1']:.4f}")
    print("Metrics by Class:")
    for class_name, class_metrics in metrics['Metrics by Class'].items():
        print(f"  {class_name}:")
        for metric_name, value in class_metrics.items():
            print(f"    {metric_name}: {value:.4f}")
    print()  # New line for better readability


Sequence Length 5
Accuracy: 0.6976
Overall Precision: 0.6916
Overall Recall: 0.6976
Overall F1: 0.6935
Metrics by Class:
  Clothing:
    Precision: 0.5673
    Recall: 0.4792
    F1 Score: 0.5195
  Gas stations:
    Precision: 0.6383
    Recall: 0.6345
    F1 Score: 0.6364
  Grocery:
    Precision: 0.7543
    Recall: 0.7963
    F1 Score: 0.7747

Sequence Length 7
Accuracy: 0.7127
Overall Precision: 0.7055
Overall Recall: 0.7127
Overall F1: 0.7070
Metrics by Class:
  Clothing:
    Precision: 0.5891
    Recall: 0.4814
    F1 Score: 0.5298
  Gas stations:
    Precision: 0.6769
    Recall: 0.6284
    F1 Score: 0.6517
  Grocery:
    Precision: 0.7554
    Recall: 0.8221
    F1 Score: 0.7874

Sequence Length 15
Accuracy: 0.7348
Overall Precision: 0.7269
Overall Recall: 0.7348
Overall F1: 0.7262
Metrics by Class:
  Clothing:
    Precision: 0.6185
    Recall: 0.4684
    F1 Score: 0.5331
  Gas stations:
    Precision: 0.7215
    Recall: 0.6142
    F1 Score: 0.6635
  Grocery:
    Precision: 0.7628