In [9]:
import os
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import OneHotEncoder
import gensim.downloader as api
import re
import swifter
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import GlorotUniform, Orthogonal
import random
# Enable tqdm for pandas
tqdm.pandas()

# Ensure Reproducibility
import random
# Set seeds for reproducibility
SEED = 42

# Python's built-in random
random.seed(SEED)

# NumPy
np.random.seed(SEED)

# TensorFlow
tf.random.set_seed(SEED)

# Set Python hash seed
os.environ['PYTHONHASHSEED'] = str(SEED)

# Configure TensorFlow for deterministic operations
tf.keras.utils.set_random_seed(SEED)  # Sets all random seeds for the program (Python, NumPy, and TensorFlow)
tf.config.experimental.enable_op_determinism()  # Enable deterministic operations in TensorFlow

# If using GPU, you might also want to set these:
if tf.config.list_physical_devices('GPU'):
    # Force TensorFlow to use deterministic GPU operations
    tf.config.experimental.enable_op_determinism()
    # Limit GPU memory growth
    for gpu in tf.config.experimental.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)

os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # Limit to one GPU if using multiple GPUs
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

In [2]:
# Load GloVe model
glove_model = api.load("glove-twitter-200")  # 200-dimensional GloVe embeddings

In [None]:
# Function to compute the average word vector for a tweet
def get_avg_embedding(tweet, model, vector_size=200):
    words = tweet.split()  # Tokenize by whitespace
    word_vectors = [model[word] for word in words if word in model]
    if not word_vectors:  # If no words in the tweet are in the vocabulary, return a zero vector
        return np.zeros(vector_size)
    return np.mean(word_vectors, axis=0)

import re
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from nltk.stem import WordNetLemmatizer

# Preprocessing function using TweetTokenizer
def preprocess_text(text):
    # Initialize tokenizer and lemmatizer
    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True, reduce_len=True)
    lemmatizer = WordNetLemmatizer()
    
    # Tokenization using TweetTokenizer
    words = tokenizer.tokenize(text)
    
    # Remove punctuation and numbers
    words = [re.sub(r'[^\w\s]', '', word) for word in words]
    words = [re.sub(r'\d+', '', word) for word in words]
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words and word.strip() != '']
    
    # Lemmatization
    words = [lemmatizer.lemmatize(word) for word in words]
    
    # Return cleaned text
    return ' '.join(words)


In [4]:
# Load data
folder_path = "train_tweets"
csv_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".csv")]
df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# Apply preprocessing
df['Tweet'] = df['Tweet'].swifter.apply(preprocess_text)
print(df.head)

KeyboardInterrupt: 

In [None]:
# Feature creation
# Add the length of each tweet as a feature
df['TweetLength'] = df['Tweet'].apply(len)

# Add a simple tweet count feature
df['TweetCount'] = df.groupby(['MatchID', 'PeriodID', 'Timestamp'])['Timestamp'].transform('count')

# Add word count as a feature
df['WordCount'] = df['Tweet'].apply(lambda x: len(x.split()))

print(df)

             ID  MatchID  PeriodID  EventType      Timestamp  \
0           2_0        2         0          0  1403538600000   
1           2_0        2         0          0  1403538600000   
2           2_0        2         0          0  1403538600000   
3           2_0        2         0          0  1403538600000   
4           2_0        2         0          0  1403538600000   
...         ...      ...       ...        ...            ...   
5056045  17_129       17       129          1  1403805600000   
5056046  17_129       17       129          1  1403805600000   
5056047  17_129       17       129          1  1403805600000   
5056048  17_129       17       129          1  1403805600000   
5056049  17_129       17       129          1  1403805600000   

                                                     Tweet  TweetLength  \
0        rt soccerdotcom esp beat au well give away spa...          104   
1        visit sitep official web site httptcoehzkslan ...           95   
2     

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle

# Compute TF-IDF weights for the corpus

# Optional: start from zero and fit on tweets
vectorizer = TfidfVectorizer(max_features=10000) 
vectorizer.fit(df['Tweet'])


# Load pre-computed
#with open('tfidf_vectorizer.pkl', 'rb') as f:
#    vectorizer = pickle.load(f)


tfidf_weights = dict(zip(vectorizer.get_feature_names_out(), vectorizer.idf_))

# Weighted average embeddings
def get_weighted_avg_embedding(tweet, model, vector_size=200, weights=tfidf_weights):
    words = tweet.split()
    word_vectors = [model[word] * weights.get(word, 1) for word in words if word in model]
    if not word_vectors:
        return np.zeros(vector_size)
    return np.mean(word_vectors, axis=0)


In [None]:
# Generate embeddings for each tweet
vector_size = 200  # GloVe embedding dimension
tweet_vectors = df['Tweet'].swifter.apply(lambda tweet: get_weighted_avg_embedding(tweet, model=glove_model, vector_size=200, weights=tfidf_weights))
tweet_vectors = np.array(list(tweet_vectors), dtype=np.float32)

# # Save the tweet vectors
# with open("tweet_vectors.pkl", "wb") as f:
#     pickle.dump(tweet_vectors, f)
# 
# print("Embeddings saved successfully!")
# 
# # Load the tweet vectors
# with open("tweet_vectors.pkl", "rb") as f:
#     loaded_tweet_vectors = pickle.load(f)
# 
# print("Embeddings loaded successfully!")
# print("Loaded vectors shape:", loaded_tweet_vectors.shape)



Embeddings saved successfully!
Embeddings loaded successfully!
Loaded vectors shape: (5056050, 200)


In [10]:
###### Use if no period features ######
# tweet_df = pd.DataFrame(loaded_tweet_vectors)
# 
# # Attach the vectors into the original dataframe
# period_features = pd.concat([df, tweet_df], axis=1)
# 
# # Drop the columns that are not useful anymore
# period_features = period_features.drop(columns=['Timestamp', 'Tweet'])
# 
# print("X_train_reshaped shape:", period_features.shape)
# # Group the tweets into their corresponding periods. This way we generate an average embedding vector for each period
# period_features = period_features.groupby(['MatchID', 'PeriodID', 'ID']).mean().reset_index()
# 
# # Save the tweet vectors
# with open("period_features.pkl", "wb") as f:
#     pickle.dump(period_features, f)
# 
# print("Period features saved successfully!")

# Load the tweet vectors
with open("period_features.pkl", "rb") as f:
    loaded_period_features = pickle.load(f)

print("Period features loaded successfully!")
print("Loaded vectors shape:", loaded_period_features.shape)

Period features loaded successfully!
Loaded vectors shape: (2137, 207)


In [11]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Drop non-numerical features
X = loaded_period_features.drop(columns=['EventType', 'MatchID', 'ID']).values

# Extract labels
y = loaded_period_features['EventType'].values

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y.reshape(-1, 1))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=SEED)

# Add a time-step dimension to match LSTM input shape
X_train_reshaped = X_train[:, None, :]  # Add a new axis for timesteps
X_test_reshaped = X_test[:, None, :]    # Add a new axis for timesteps

# Print shapes for verification
print("X_train_reshaped shape:", X_train_reshaped.shape)
print("X_test_reshaped shape:", X_test_reshaped.shape)


X_train_reshaped shape: (1709, 1, 204)
X_test_reshaped shape: (428, 1, 204)


In [41]:
# Define the early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',      # Monitor validation loss
    patience=5,              # Stop training if no improvement after 3 epochs
    restore_best_weights=True  # Restore the best weights when stopping
)

# Define the LSTM model with deterministic initializers
model = Sequential([
    tf.keras.layers.Input(shape=(1, X_train_reshaped.shape[2])),  
    LSTM(
        128, 
        return_sequences=False, 
        kernel_initializer=GlorotUniform(seed=SEED), 
        recurrent_initializer=Orthogonal(seed=SEED),
        bias_initializer='zeros'
    ),             
    Dense(y_encoded.shape[1], activation='softmax', kernel_initializer=GlorotUniform(seed=SEED))
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train_reshaped, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[early_stopping],  # Include the early stopping callback
                    verbose=1)

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test_reshaped, y_test, verbose=1)

print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

Epoch 1/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.5804 - loss: 0.6845 - val_accuracy: 0.5906 - val_loss: 0.6759
Epoch 2/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6285 - loss: 0.6385 - val_accuracy: 0.6023 - val_loss: 0.6518
Epoch 3/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6694 - loss: 0.6036 - val_accuracy: 0.6579 - val_loss: 0.6282
Epoch 4/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7024 - loss: 0.5739 - val_accuracy: 0.6696 - val_loss: 0.6109
Epoch 5/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7217 - loss: 0.5534 - val_accuracy: 0.6725 - val_loss: 0.6010
Epoch 6/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7317 - loss: 0.5369 - val_accuracy: 0.6784 - val_loss: 0.5911
Epoch 7/50
[1m43/43[0m [32m━━━━━━━━━

In [None]:
###### For Kaggle submission

predictions = []
dummy_predictions = []
# We read each file separately, we preprocess the tweets and then use the classifier to predict the labels.
# Finally, we concatenate all predictions into a list that will eventually be concatenated and exported
# to be submitted on Kaggle.
for fname in sorted(os.listdir("eval_tweets")):
    val_df = pd.read_csv("eval_tweets/" + fname)
    
    val_df['Tweet'] = val_df['Tweet'].swifter.apply(preprocess_text)

    # Feature creation
    # Add the length of each tweet as a feature
    val_df['TweetLength'] = val_df['Tweet'].apply(len)
    
    # Add a simple tweet count feature
    val_df['TweetCount'] = val_df.groupby(['MatchID', 'PeriodID', 'Timestamp'])['Timestamp'].transform('count')
    
    # Add word count as a feature
    val_df['WordCount'] = val_df['Tweet'].apply(lambda x: len(x.split()))

    tweet_vectors = val_df['Tweet'].swifter.apply(lambda tweet: get_weighted_avg_embedding(tweet, model=glove_model, vector_size=200, weights=tfidf_weights))

    tweet_vectors = np.array(list(tweet_vectors), dtype=np.float32)

    tweet_df = pd.DataFrame(tweet_vectors)

    period_features_val = pd.concat([val_df, tweet_df], axis=1)
    period_features_val = period_features_val.drop(columns=['Timestamp', 'Tweet'])
    period_features_val = period_features_val.groupby(['MatchID', 'PeriodID', 'ID']).mean().reset_index()

    X = period_features_val.drop(columns=['MatchID', 'ID']).values

    # Reshape input for LSTM
    X_reshaped = X[:, None, :]  # Add timestep dimension

    preds = model.predict(X_reshaped)
    preds = preds.argmax(axis=1)  # Convert probabilities to class indices
    period_features_val['EventType'] = preds
    predictions.append(period_features_val[['ID', 'EventType']])


pred_df = pd.concat(predictions)
pred_df.to_csv('LSTM_predictions.csv', index=False)



Pandas Apply: 100%|██████████| 285804/285804 [01:35<00:00, 2999.22it/s]
Pandas Apply: 100%|██████████| 285804/285804 [00:16<00:00, 17849.23it/s]


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


Pandas Apply: 100%|██████████| 45024/45024 [00:14<00:00, 3062.46it/s]
Pandas Apply: 100%|██████████| 45024/45024 [00:03<00:00, 14688.43it/s]


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


Pandas Apply: 100%|██████████| 113402/113402 [00:35<00:00, 3218.30it/s]
Pandas Apply: 100%|██████████| 113402/113402 [00:06<00:00, 18774.49it/s]


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


Pandas Apply: 100%|██████████| 628698/628698 [03:15<00:00, 3215.25it/s]
Pandas Apply: 100%|██████████| 628698/628698 [00:34<00:00, 18458.99it/s]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [14]:
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
import numpy as np
import warnings


# Suppress warnings from Scikit-learn and XGBoost
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Initialize cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

# Reshape X for XGBoost
X_train_sklearn = X_train_reshaped.reshape(X_train_reshaped.shape[0], -1)
X_test_sklearn = X_test_reshaped.reshape(X_test_reshaped.shape[0], -1)

# Ensure labels are binary (1D)
if len(y_train.shape) > 1:
    y_train_sklearn = np.argmax(y_train, axis=1)
    y_test_sklearn = np.argmax(y_test, axis=1)
else:
    y_train_sklearn = y_train
    y_test_sklearn = y_test

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=XGBClassifier(
        use_label_encoder=True, 
        objective='binary:logistic', 
        eval_metric='logloss',
        random_state=SEED
    ),
    param_grid=param_grid,
    scoring='f1',
    cv=kf,
    verbose=0,  # Turn off detailed output
    n_jobs=-1
)

# Fit GridSearchCV
print("Starting Grid Search...")
grid_search.fit(X_train_sklearn, y_train_sklearn)
best_model = grid_search.best_estimator_

# Print best parameters and F1 score
print(f"\nBest Parameters: {grid_search.best_params_}")
print(f"Best F1 Score on Training Data: {grid_search.best_score_:.4f}")

# Evaluate on the test set
y_pred_classes = best_model.predict(X_test_sklearn)
test_accuracy = best_model.score(X_test_sklearn, y_test_sklearn)
f1 = f1_score(y_test_sklearn, y_pred_classes, average='binary')

# Print evaluation metrics
print(f"\nTest Accuracy: {test_accuracy:.4f}")
print(f"Test F1 Score: {f1:.4f}")


Starting Grid Search...


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode


Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 150, 'subsample': 0.8}
Best F1 Score on Training Data: 0.7853

Test Accuracy: 0.7921
Test F1 Score: 0.8086


In [None]:
###### For Kaggle submission (XGBoost)

predictions = []
dummy_predictions = []
# We read each file separately, we preprocess the tweets and then use the classifier to predict the labels.
# Finally, we concatenate all predictions into a list that will eventually be concatenated and exported
# to be submitted on Kaggle.
for fname in sorted(os.listdir("eval_tweets")):
    val_df = pd.read_csv("eval_tweets/" + fname)
    
    val_df['Tweet'] = val_df['Tweet'].swifter.apply(preprocess_text)

    # Feature creation
    # Add the length of each tweet as a feature
    val_df['TweetLength'] = val_df['Tweet'].apply(len)
    
    # Add a simple tweet count feature
    val_df['TweetCount'] = val_df.groupby(['MatchID', 'PeriodID', 'Timestamp'])['Timestamp'].transform('count')
    
    # Add word count as a feature
    val_df['WordCount'] = val_df['Tweet'].apply(lambda x: len(x.split()))

    tweet_vectors = val_df['Tweet'].swifter.apply(lambda tweet: get_weighted_avg_embedding(tweet, model=glove_model, vector_size=200, weights=tfidf_weights))

    tweet_vectors = np.array(list(tweet_vectors), dtype=np.float32)

    tweet_df = pd.DataFrame(tweet_vectors)

    period_features_val = pd.concat([val_df, tweet_df], axis=1)
    period_features_val = period_features_val.drop(columns=['Timestamp', 'Tweet'])
    period_features_val = period_features_val.groupby(['MatchID', 'PeriodID', 'ID']).mean().reset_index()

    X = period_features_val.drop(columns=['MatchID', 'ID']).values

    preds = model.predict(X)
    period_features_val['EventType'] = preds
    predictions.append(period_features_val[['ID', 'EventType']])


pred_df = pd.concat(predictions)
pred_df.to_csv('XGBoost_fitted_predictions.csv', index=False)

Pandas Apply: 100%|██████████| 285804/285804 [01:40<00:00, 2854.07it/s]
Pandas Apply: 100%|██████████| 285804/285804 [00:16<00:00, 17167.17it/s]


ValueError: Exception encountered when calling Sequential.call().

[1mCannot take the length of shape with unknown rank.[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=<unknown>, dtype=float32)
  • training=False
  • mask=None

In [21]:
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import numpy as np
import tensorflow as tf

# Fix: Ensure inputs are reshaped for sklearn models
def reshape_data_for_sklearn(X_train, X_test, y_train, y_test):
    X_train_sklearn = X_train.reshape(X_train.shape[0], -1)
    X_test_sklearn = X_test.reshape(X_test.shape[0], -1)

    if len(y_train.shape) > 1:
        y_train_sklearn = np.argmax(y_train, axis=1)
        y_test_sklearn = np.argmax(y_test, axis=1)
    else:
        y_train_sklearn = y_train
        y_test_sklearn = y_test

    return X_train_sklearn, X_test_sklearn, y_train_sklearn, y_test_sklearn


# Model evaluation function
def evaluate_model(model, X_train, y_train, X_test, y_test, model_type="sklearn"):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Tracking metrics
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []
    roc_auc_scores = []

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        print(f"\nTraining fold {fold+1}...")

        # Split data
        X_train_cv, X_val_cv = X_train[train_index], X_train[val_index]
        y_train_cv, y_val_cv = y_train[train_index], y_train[val_index]

        if model_type == "tensorflow":
            # Early stopping for TensorFlow models
            early_stopping = tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=5,
                restore_best_weights=True
            )

            # Compile and train the model
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            history = model.fit(X_train_cv, y_train_cv,
                                epochs=50,
                                batch_size=32,
                                validation_data=(X_val_cv, y_val_cv),
                                callbacks=[early_stopping],
                                verbose=1)
            
            _, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
            y_pred_probs = model.predict(X_test)
            y_pred_classes = np.argmax(y_pred_probs, axis=1)
            y_true_classes = np.argmax(y_test, axis=1)

        else:  # scikit-learn models
            model.fit(X_train_cv, y_train_cv)
            y_pred_probs = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else model.decision_function(X_test)
            y_pred_classes = model.predict(X_test)
            y_true_classes = y_test

        # Calculate evaluation metrics
        precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
        recall = recall_score(y_true_classes, y_pred_classes, average='weighted')
        f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
        accuracy = accuracy_score(y_true_classes, y_pred_classes)
        roc_auc = roc_auc_score(y_true_classes, y_pred_probs) if len(np.unique(y_true_classes)) == 2 else np.nan

        # Store results
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)
        accuracy_scores.append(accuracy)
        roc_auc_scores.append(roc_auc)

        print(f"Fold {fold+1} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, "
              f"Accuracy: {accuracy:.4f}, ROC-AUC: {roc_auc:.4f}")

    # Print average results
    print("\nCross-Validation Results:")
    print(f"Average Precision: {np.mean(precision_scores):.4f} ± {np.std(precision_scores):.4f}")
    print(f"Average Recall: {np.mean(recall_scores):.4f} ± {np.std(recall_scores):.4f}")
    print(f"Average F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
    print(f"Average Accuracy: {np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}")
    print(f"Average ROC-AUC: {np.mean(roc_auc_scores):.4f} ± {np.std(roc_auc_scores):.4f}")
    print("----------------------------------------------------\n")


# Ensure correct reshaping
X_train_sklearn, X_test_sklearn, y_train_sklearn, y_test_sklearn = reshape_data_for_sklearn(
    X_train_reshaped, X_test_reshaped, y_train, y_test
)

# Evaluate various models
print("Evaluating Logistic Regression:")
evaluate_model(LogisticRegression(max_iter=1000), X_train_sklearn, y_train_sklearn, X_test_sklearn, y_test_sklearn)

print("Evaluating Random Forest:")
evaluate_model(RandomForestClassifier(n_estimators=100), X_train_sklearn, y_train_sklearn, X_test_sklearn, y_test_sklearn)

print("Evaluating SVM:")
evaluate_model(SVC(kernel='rbf', probability=True), X_train_sklearn, y_train_sklearn, X_test_sklearn, y_test_sklearn)

print("Evaluating XGBoost:")
evaluate_model(XGBClassifier(use_label_encoder=False, eval_metric='logloss'), X_train_sklearn, y_train_sklearn, X_test_sklearn, y_test_sklearn)



Evaluating Logistic Regression:

Training fold 1...
Fold 1 - Precision: 0.7641, Recall: 0.7640, F1 Score: 0.7640, Accuracy: 0.7640, ROC-AUC: 0.8523

Training fold 2...
Fold 2 - Precision: 0.7643, Recall: 0.7640, F1 Score: 0.7634, Accuracy: 0.7640, ROC-AUC: 0.8511

Training fold 3...
Fold 3 - Precision: 0.7593, Recall: 0.7593, F1 Score: 0.7590, Accuracy: 0.7593, ROC-AUC: 0.8503

Training fold 4...
Fold 4 - Precision: 0.7756, Recall: 0.7757, F1 Score: 0.7756, Accuracy: 0.7757, ROC-AUC: 0.8592

Training fold 5...
Fold 5 - Precision: 0.7862, Recall: 0.7850, F1 Score: 0.7842, Accuracy: 0.7850, ROC-AUC: 0.8598

Cross-Validation Results:
Average Precision: 0.7699 ± 0.0097
Average Recall: 0.7696 ± 0.0094
Average F1 Score: 0.7693 ± 0.0093
Average Accuracy: 0.7696 ± 0.0094
Average ROC-AUC: 0.8545 ± 0.0041
----------------------------------------------------

Evaluating Random Forest:

Training fold 1...
Fold 1 - Precision: 0.7900, Recall: 0.7897, F1 Score: 0.7892, Accuracy: 0.7897, ROC-AUC: 0.87

In [38]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score

model = None

# Define the LSTM model function
def build_lstm_model(input_shape, num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape),
        tf.keras.layers.LSTM(128, return_sequences=False),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Model evaluation function
def evaluate_lstm_model(X_train, y_train, X_test, y_test):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []
    roc_auc_scores = []

    for fold, (train_index, val_index) in enumerate(kf.split(X_train)):
        print(f"\nTraining fold {fold+1}...")

        # Split data
        X_train_cv, X_val_cv = X_train[train_index], X_train[val_index]
        y_train_cv, y_val_cv = y_train[train_index], y_train[val_index]

        # Define the model
        model = build_lstm_model((X_train.shape[1], X_train.shape[2]), y_train.shape[1])

        # Early stopping for TensorFlow models
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )

        # Train the model
        model.fit(X_train_cv, y_train_cv,
                  epochs=50,
                  batch_size=32,
                  validation_data=(X_val_cv, y_val_cv),
                  callbacks=[early_stopping],
                  verbose=1
                  )
        
        # Evaluate the model
        _, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
        y_pred_probs = model.predict(X_test)
        y_pred_classes = np.argmax(y_pred_probs, axis=1)
        y_true_classes = np.argmax(y_test, axis=1)

        # Calculate evaluation metrics
        precision = precision_score(y_true_classes, y_pred_classes, average='weighted')
        recall = recall_score(y_true_classes, y_pred_classes, average='weighted')
        f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
        accuracy = accuracy_score(y_true_classes, y_pred_classes)
        roc_auc = roc_auc_score(y_true_classes, y_pred_probs, multi_class='ovr') if y_train.shape[1] > 2 else roc_auc_score(y_true_classes, y_pred_probs[:, 1])

        # Store results
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)
        accuracy_scores.append(accuracy)
        roc_auc_scores.append(roc_auc)

        print(f"Fold {fold+1} - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}, "
              f"Accuracy: {accuracy:.4f}, ROC-AUC: {roc_auc:.4f}")

    # Print average results
    print("\nCross-Validation Results:")
    print(f"Average Precision: {np.mean(precision_scores):.4f} ± {np.std(precision_scores):.4f}")
    print(f"Average Recall: {np.mean(recall_scores):.4f} ± {np.std(recall_scores):.4f}")
    print(f"Average F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
    print(f"Average Accuracy: {np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}")
    print(f"Average ROC-AUC: {np.mean(roc_auc_scores):.4f} ± {np.std(roc_auc_scores):.4f}")
    print("----------------------------------------------------\n")


# Evaluate the LSTM model
evaluate_lstm_model(X_train_reshaped, y_train, X_test_reshaped, y_test)



Training fold 1...
Epoch 1/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5792 - loss: 0.6840 - val_accuracy: 0.6228 - val_loss: 0.6561
Epoch 2/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6399 - loss: 0.6285 - val_accuracy: 0.6374 - val_loss: 0.6229
Epoch 3/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6635 - loss: 0.5994 - val_accuracy: 0.7076 - val_loss: 0.5957
Epoch 4/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6934 - loss: 0.5775 - val_accuracy: 0.7398 - val_loss: 0.5755
Epoch 5/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7189 - loss: 0.5581 - val_accuracy: 0.7310 - val_loss: 0.5600
Epoch 6/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7267 - loss: 0.5404 - val_accuracy: 0.7281 - val_loss: 0.5560
Epoch 7/50
[1m43/4