In [None]:
try:
    from google.colab import drive
    drive.mount('/content/drive/')
    %cd 'drive/My Drive/Team Project'
except ImportError as e:
    pass

Mounted at /content/drive/
/content/drive/My Drive/Team Project


In [None]:
import pandas as pd

In [None]:
df = pd.read_excel("Journal_500Dataset.xlsx")

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   text_cleaned  500 non-null    object
 1   er_strat      480 non-null    object
 2   adaptive      429 non-null    object
dtypes: object(3)
memory usage: 11.8+ KB


In [None]:
df['er_strat'].value_counts()

Unnamed: 0_level_0,count
er_strat,Unnamed: 1_level_1
Attentional Deployment,142
Cognitive Change,113
Response Modulation,63
none,58
Situation Selection,20
Situation Modification,15
"Response Modulation, Cognitive Change",14
"Situation Selection, Cognitive Change",11
"Attentional Deployment, Cognitive Change",7
"Situation Modification, Attentional Deployment",7


**Neural Network Approach**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.metrics import classification_report  # Import classification_report
import joblib
import traceback

# TF-IDF Vectorization
def tfidf_vectorizer(X_train, X_test):
    try:
        vectorizer = TfidfVectorizer(max_features=4000, ngram_range=(1, 2))
        X_train = vectorizer.fit_transform(X_train)
        X_test = vectorizer.transform(X_test)
        joblib.dump(vectorizer, "tfidf_vectorizer.sav")
        return X_train.toarray(), X_test.toarray()  # Convert sparse matrix to array
    except Exception as e:
        print(f"Error in TF-IDF Vectorization: {e}")
        traceback.print_exc()

# Build Neural Network Model
def build_model(input_dim, output_dim):
    model = Sequential([
        Dense(512, activation='relu', input_dim=input_dim),
        Dropout(0.3),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(output_dim, activation='sigmoid')  # Sigmoid for multi-label classification
    ])
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',  # Binary crossentropy for multi-label
                  metrics=['accuracy'])
    return model

# Main Script
try:
    # Load Dataset
    file_path = "Dataset500.xlsx"  # Update file path
    df = pd.read_excel(file_path)
    df = df[df['er_strat'].notna()]  # Drop rows with missing labels
    df["labels"] = df["er_strat"].apply(lambda x: x.split(", "))  # Convert to list

    # Encode Labels using MultiLabelBinarizer
    mlb = MultiLabelBinarizer()
    y = mlb.fit_transform(df["labels"])
    X = df["text_cleaned"]

    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    # Apply TF-IDF
    X_train, X_test = tfidf_vectorizer(X_train, X_test)

    # Compute Sample Weights for Imbalance Handling
    sample_weights = compute_sample_weight(class_weight="balanced", y=y_train)

    # Build & Train Model
    model = build_model(input_dim=X_train.shape[1], output_dim=y_train.shape[1])

    early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

    model.fit(X_train, y_train,
              validation_data=(X_test, y_test),
              epochs=50,
              batch_size=32,
              sample_weight=sample_weights,  # Adjust weights for imbalance
              callbacks=[early_stopping],
              verbose=1)

    # Save the model
    model.save("multi_label_nn_model.h5")

    # Evaluate Model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Make Predictions
    y_pred = model.predict(X_test)
    y_pred_binary = (y_pred > 0.5).astype(int)  # Convert probabilities to binary labels

    # Save classification report
    report = classification_report(y_test, y_pred_binary, target_names=mlb.classes_, zero_division=0, output_dict=True)
    pd.DataFrame(report).transpose().to_csv("NN_classification_report.csv")

except Exception as e:
    print(f"Error in main script: {e}")
    traceback.print_exc()


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 81ms/step - accuracy: 0.2900 - loss: 0.2492 - val_accuracy: 0.3403 - val_loss: 0.5944
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.3089 - loss: 0.2270 - val_accuracy: 0.3403 - val_loss: 0.5128
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3512 - loss: 0.2144 - val_accuracy: 0.3403 - val_loss: 0.5091
Epoch 4/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.3421 - loss: 0.1890 - val_accuracy: 0.2986 - val_loss: 0.5069
Epoch 5/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.4966 - loss: 0.1649 - val_accuracy: 0.3264 - val_loss: 0.4900
Epoch 6/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.7745 - loss: 0.1466 - val_accuracy: 0.3681 - val_loss: 0.4758
Epoch 7/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.4473 - loss: 0.4504




Test Accuracy: 0.4097
[1m1/5[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 63ms/step



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


**CNN + BiLSTM Approach**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Bidirectional, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report

# Load Dataset
file_path = "Dataset500.xlsx"  # Update file path
df = pd.read_excel(file_path)
df = df[df['er_strat'].notna()]  # Drop missing labels
df["labels"] = df["er_strat"].apply(lambda x: x.split(", "))  # Convert labels to list

# Encode Labels
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df["labels"])
X = df["text_cleaned"]

# Tokenization & Padding
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_seq, maxlen=200, padding="post", truncating="post")

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.3, random_state=42)

# Build CNN + BiLSTM Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=200),
    Conv1D(filters=64, kernel_size=5, activation="relu"),
    MaxPooling1D(pool_size=2),
    Bidirectional(LSTM(128, return_sequences=True)),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(y.shape[1], activation="sigmoid")  # Multi-label classification
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=1)

# Save Model
model.save("cnn_bilstm_model.h5")

# Evaluate Model
y_pred = (model.predict(X_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))


Epoch 1/10




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 382ms/step - accuracy: 0.2177 - loss: 0.5786 - val_accuracy: 0.3542 - val_loss: 0.4638
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 329ms/step - accuracy: 0.3323 - loss: 0.4468 - val_accuracy: 0.4028 - val_loss: 0.4467
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 464ms/step - accuracy: 0.3635 - loss: 0.4438 - val_accuracy: 0.4306 - val_loss: 0.4394
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 299ms/step - accuracy: 0.4270 - loss: 0.4268 - val_accuracy: 0.4306 - val_loss: 0.4296
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 300ms/step - accuracy: 0.5215 - loss: 0.3944 - val_accuracy: 0.4306 - val_loss: 0.4336
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 403ms/step - accuracy: 0.5531 - loss: 0.3648 - val_accuracy: 0.4514 - val_loss: 0.4424
Epoch 7/10
[1m11/11[0m [32m━━━━━━━━



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 167ms/step
                        precision    recall  f1-score   support

Attentional Deployment       0.44      0.27      0.34        51
      Cognitive Change       0.60      0.48      0.53        56
   Response Modulation       0.28      0.33      0.31        27
Situation Modification       0.00      0.00      0.00        13
   Situation Selection       0.00      0.00      0.00        14
                  none       1.00      0.42      0.59        19

             micro avg       0.49      0.32      0.39       180
             macro avg       0.39      0.25      0.29       180
          weighted avg       0.46      0.32      0.37       180
           samples avg       0.38      0.34      0.34       180



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
