<a href="https://colab.research.google.com/github/VaishnaviBairagoni/Natural-Language-Processing-NLP-/blob/main/(NLP-F-12-9-2025).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
#Deep Learning
#Preprocessing
import pandas as pd
import re
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Load dataset
df = pd.read_csv("/tweets.csv")  # adjust path if needed

# Define stopwords
stopwords = set(ENGLISH_STOP_WORDS)

# Preprocessing function
def preprocess_text(s):
    s = str(s).lower()                      # lowercase
    s = re.sub(r'http\S+|www\.\S+', ' ', s)  # remove URLs
    s = re.sub(r'@\w+', ' ', s)               # remove mentions
    s = re.sub(r'#\w+', ' ', s)               # remove hashtags
    s = re.sub(r'[^a-z\s]', ' ', s)           # remove punctuation, numbers, special chars
    s = re.sub(r'\s+', ' ', s).strip()        # remove extra spaces
    tokens = [w for w in s.split() if w not in stopwords and len(w) > 1]
    return " ".join(tokens)

# Apply preprocessing
df["clean"] = df["text"].astype(str).apply(preprocess_text)

print(df[["text", "clean"]].head())


                                                text  \
0  Communal violence in Bhainsa, Telangana. "Ston...   
1  Telangana: Section 144 has been imposed in Bha...   
2  Arsonist sets cars ablaze at dealership https:...   
3  Arsonist sets cars ablaze at dealership https:...   
4  "Lord Jesus, your love brings freedom and pard...   

                                               clean  
0  communal violence bhainsa telangana stones pel...  
1  telangana section imposed bhainsa january clas...  
2               arsonist sets cars ablaze dealership  
3               arsonist sets cars ablaze dealership  
4  lord jesus love brings freedom pardon holy spi...  


In [13]:
# Feature extraction: CountVectorizer (bag-of-words) + TF-IDF
import joblib
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split

X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    df['clean'].values, df['target'].values, test_size=0.2, stratify=df['target'].values, random_state=42
)

# --- CountVectorizer (countervctor) ---
count_vect = CountVectorizer(
    max_features=10000,      # adjust up/down based on memory
    ngram_range=(1,2),       # unigrams + bigrams often helpful for tweets
    min_df=2,                # ignore extremely rare tokens
    binary=False
)
X_train_count = count_vect.fit_transform(X_train_raw)
X_test_count  = count_vect.transform(X_test_raw)

print("Count vector shape:", X_train_count.shape)  # (n_samples, vocab_size)

# Save for later reuse
joblib.dump(count_vect, "count_vectorizer.joblib")

# --- TF-IDF Vectorizer ---
tfidf_vect = TfidfVectorizer(
    max_features=10000,
    ngram_range=(1,2),
    min_df=2,
    norm='l2',
    sublinear_tf=True
)
X_train_tfidf = tfidf_vect.fit_transform(X_train_raw)
X_test_tfidf  = tfidf_vect.transform(X_test_raw)

print("TF-IDF shape:", X_train_tfidf.shape)

# Save TF-IDF vectorizer
joblib.dump(tfidf_vect, "tfidf_vectorizer.joblib")

# Optional quick sanity: top features
def top_terms(vectorizer, k=20):
    feat = vectorizer.get_feature_names_out()
    return feat[:k]

print("Sample TF-IDF features:", top_terms(tfidf_vect, 20))



Count vector shape: (9096, 10000)
TF-IDF shape: (9096, 10000)
Sample TF-IDF features: ['ab' 'abandoned' 'abbott' 'abby' 'abc' 'ablaze' 'ablaze dealership'
 'able' 'able totally' 'able touch' 'abnormal' 'abo' 'aboard'
 'abomination' 'abomination desolation' 'aboriginal' 'aboriginal planners'
 'abortion' 'abortion true' 'abou']


In [15]:
# Deep Learning Models (Keras)


import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (Input, Embedding, GlobalAveragePooling1D, Dense, Dropout,
                                     Conv1D, GlobalMaxPooling1D, SpatialDropout1D,
                                     LSTM, Bidirectional)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib

# Reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

#Hyperparams
MAX_NUM_WORDS = 20000
MAX_SEQUENCE_LENGTH = 60
EMBEDDING_DIM = 100
BATCH_SIZE = 128
EPOCHS = 6

# Optional: use pretrained GloVe if you have it (download glove.6B.100d.txt to working dir)
USE_GLOVE = False
GLOVE_PATH = "glove.6B.100d.txt"

# Tokenize + Pad
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train_raw)         # X_train_raw should be an array/list of cleaned strings

X_train_seq = tokenizer.texts_to_sequences(X_train_raw)
X_test_seq  = tokenizer.texts_to_sequences(X_test_raw)

X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')
X_test_pad  = pad_sequences(X_test_seq,  maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post')

vocab_size = min(MAX_NUM_WORDS, len(tokenizer.word_index) + 1)
print("Vocab size:", vocab_size)

# Optional: build embedding matrix from GloVe
embedding_matrix = None
if USE_GLOVE:
    if not os.path.exists(GLOVE_PATH):
        raise FileNotFoundError(f"GloVe file not found at {GLOVE_PATH}. Set USE_GLOVE=False or provide the file.")
    print("Loading GloVe vectors (this may take a minute)...")
    embeddings_index = {}
    with open(GLOVE_PATH, "r", encoding="utf8") as f:
        for line in f:
            parts = line.rstrip().split(" ")
            word = parts[0]
            vec = np.asarray(parts[1:], dtype="float32")
            embeddings_index[word] = vec
    embedding_matrix = np.zeros((vocab_size, EMBEDDING_DIM))
    for word, idx in tokenizer.word_index.items():
        if idx >= vocab_size:
            continue
        vec = embeddings_index.get(word)
        if vec is not None and vec.shape[0] == EMBEDDING_DIM:
            embedding_matrix[idx] = vec
    print("Prepared embedding matrix.")

# ---------------- Utility: evaluation ----------------
results = []

def eval_and_store(name, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    print(f"\n{name} | Acc: {acc:.4f} | Prec: {prec:.4f} | Rec: {rec:.4f} | F1: {f1:.4f}")
    print(classification_report(y_true, y_pred, digits=4))
    results.append({"model": name, "accuracy": acc, "precision": prec, "recall": rec, "f1": f1})

# Early stopping callback
es = EarlyStopping(monitor="val_loss", patience=2, restore_best_weights=True, verbose=1)

#  Model 1: MLP on averaged embeddings
print("\nBuilding and training: MLP (avg embeddings)")
inputs = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype="int32")
if USE_GLOVE and (embedding_matrix is not None):
    emb = Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(inputs)
else:
    emb = Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH)(inputs)

x = GlobalAveragePooling1D()(emb)
x = Dense(64, activation="relu")(x)
x = Dropout(0.3)(x)
outputs = Dense(1, activation="sigmoid")(x)

mlp_model = Model(inputs, outputs)
mlp_model.compile(optimizer=Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
mlp_model.summary()
mlp_model.fit(X_train_pad, y_train, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[es], verbose=1)

y_pred_mlp = (mlp_model.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
eval_and_store("MLP (avg embeddings)", y_test, y_pred_mlp)

#  Model 2: 1D-CNN
print("\nBuilding and training: 1D-CNN")
cnn = Sequential()
if USE_GLOVE and (embedding_matrix is not None):
    cnn.add(Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False))
else:
    cnn.add(Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))

cnn.add(Conv1D(128, kernel_size=5, activation="relu"))
cnn.add(GlobalMaxPooling1D())
cnn.add(Dense(64, activation="relu"))
cnn.add(Dropout(0.3))
cnn.add(Dense(1, activation="sigmoid"))

cnn.compile(optimizer=Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
cnn.summary()
cnn.fit(X_train_pad, y_train, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[es], verbose=1)

y_pred_cnn = (cnn.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
eval_and_store("1D-CNN", y_test, y_pred_cnn)

# Model 3: Bidirectional LSTM
print("\nBuilding and training: Bidirectional LSTM")
lstm_model = Sequential()
if USE_GLOVE and (embedding_matrix is not None):
    lstm_model.add(Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False))
else:
    lstm_model.add(Embedding(vocab_size, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))

lstm_model.add(SpatialDropout1D(0.2))
lstm_model.add(Bidirectional(LSTM(64, dropout=0.2, recurrent_dropout=0.2)))
lstm_model.add(Dense(64, activation="relu"))
lstm_model.add(Dropout(0.3))
lstm_model.add(Dense(1, activation="sigmoid"))

lstm_model.compile(optimizer=Adam(1e-3), loss="binary_crossentropy", metrics=["accuracy"])
lstm_model.summary()
lstm_model.fit(X_train_pad, y_train, validation_split=0.1, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[es], verbose=1)

y_pred_lstm = (lstm_model.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
eval_and_store("Bidirectional LSTM", y_test, y_pred_lstm)

#Save models and tokenizer
mlp_model.save("mlp_avg_embeddings.h5")
cnn.save("cnn_text.h5")
lstm_model.save("bilstm_text.h5")
joblib.dump(tokenizer, "tokenizer.joblib")

# Save results dataframe
import pandas as pd
res_df = pd.DataFrame(results).sort_values("f1", ascending=False).reset_index(drop=True)
res_df.to_csv("dl_experiment_results.csv", index=False)
print("\nSaved dl_experiment_results.csv and model files (h5) and tokenizer.joblib")

# Done.


Vocab size: 17101

Building and training: MLP (avg embeddings)




Epoch 1/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.7644 - loss: 0.5232 - val_accuracy: 0.7912 - val_loss: 0.5077
Epoch 2/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.8125 - loss: 0.4828 - val_accuracy: 0.7912 - val_loss: 0.5066
Epoch 3/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.8125 - loss: 0.4767 - val_accuracy: 0.7912 - val_loss: 0.5031
Epoch 4/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.8125 - loss: 0.4741 - val_accuracy: 0.7912 - val_loss: 0.4963
Epoch 5/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - accuracy: 0.8125 - loss: 0.4651 - val_accuracy: 0.7912 - val_loss: 0.4819
Epoch 6/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.8125 - loss: 0.4444 - val_accuracy: 0.7912 - val_loss: 0.4491
Restoring model weights from the end of 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 85ms/step - accuracy: 0.8119 - loss: 0.5253 - val_accuracy: 0.7912 - val_loss: 0.4629
Epoch 2/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 95ms/step - accuracy: 0.8383 - loss: 0.3480 - val_accuracy: 0.8890 - val_loss: 0.3251
Epoch 3/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 113ms/step - accuracy: 0.9564 - loss: 0.1353 - val_accuracy: 0.8747 - val_loss: 0.4003
Epoch 4/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 97ms/step - accuracy: 0.9863 - loss: 0.0526 - val_accuracy: 0.8824 - val_loss: 0.5309
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 2.

1D-CNN | Acc: 0.8676 | Prec: 0.6667 | Rec: 0.5768 | F1: 0.6185
              precision    recall  f1-score   support

           0     0.9062    0.9341    0.9199      1851
           1     0.6667    0.5768    0.6185       423

    accuracy                         0.8676    



Epoch 1/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 325ms/step - accuracy: 0.7710 - loss: 0.5374 - val_accuracy: 0.7912 - val_loss: 0.4576
Epoch 2/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 323ms/step - accuracy: 0.8518 - loss: 0.3555 - val_accuracy: 0.8868 - val_loss: 0.3193
Epoch 3/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 298ms/step - accuracy: 0.9406 - loss: 0.1583 - val_accuracy: 0.8824 - val_loss: 0.3601
Epoch 4/6
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 345ms/step - accuracy: 0.9706 - loss: 0.0907 - val_accuracy: 0.8703 - val_loss: 0.4392
Epoch 4: early stopping
Restoring model weights from the end of the best epoch: 2.





Bidirectional LSTM | Acc: 0.8826 | Prec: 0.7532 | Rec: 0.5485 | F1: 0.6347
              precision    recall  f1-score   support

           0     0.9028    0.9589    0.9300      1851
           1     0.7532    0.5485    0.6347       423

    accuracy                         0.8826      2274
   macro avg     0.8280    0.7537    0.7824      2274
weighted avg     0.8750    0.8826    0.8751      2274


Saved dl_experiment_results.csv and model files (h5) and tokenizer.joblib


In [18]:
# Evaluation Section
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

results = []

def evaluate_model(name, y_true, y_pred):
    """Compute and print evaluation metrics, store in results list."""
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, zero_division=0)
    rec = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)

    print(f"\n{name}")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-score : {f1:.4f}")
    print("\nClassification Report:\n", classification_report(y_true, y_pred, digits=4))

    results.append({
        "model": name,
        "accuracy": acc,
        "precision": prec,
        "recall": rec,
        "f1": f1
    })

# Logistic Regression (TF-IDF)
if 'lr' in globals():
    y_pred_lr = lr.predict(X_test_tfidf)
    evaluate_model("Logistic Regression (TF-IDF)", y_test, y_pred_lr)

# SVM (TF-IDF)
if 'svc' in globals():
    y_pred_svc = svc.predict(X_test_tfidf)
    evaluate_model("SVM (TF-IDF)", y_test, y_pred_svc)

# MLP (avg embeddings)
if 'mlp_model' in globals():
    y_pred_mlp = (mlp_model.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
    evaluate_model("MLP (avg embeddings)", y_test, y_pred_mlp)

# 1D-CNN
if 'cnn' in globals():
    y_pred_cnn = (cnn.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
    evaluate_model("1D-CNN", y_test, y_pred_cnn)

# LSTM
if 'lstm_model' in globals():
    y_pred_lstm = (lstm_model.predict(X_test_pad, verbose=0).ravel() >= 0.5).astype(int)
    evaluate_model("Bidirectional LSTM", y_test, y_pred_lstm)

# Save Summary
results_df = pd.DataFrame(results)
results_df.to_csv("evaluation_summary.csv", index=False)
print("\nSaved evaluation_summary.csv with all metrics.")



MLP (avg embeddings)
Accuracy : 0.8140
Precision: 0.0000
Recall   : 0.0000
F1-score : 0.0000

Classification Report:
               precision    recall  f1-score   support

           0     0.8140    1.0000    0.8975      1851
           1     0.0000    0.0000    0.0000       423

    accuracy                         0.8140      2274
   macro avg     0.4070    0.5000    0.4487      2274
weighted avg     0.6626    0.8140    0.7305      2274



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



1D-CNN
Accuracy : 0.8676
Precision: 0.6667
Recall   : 0.5768
F1-score : 0.6185

Classification Report:
               precision    recall  f1-score   support

           0     0.9062    0.9341    0.9199      1851
           1     0.6667    0.5768    0.6185       423

    accuracy                         0.8676      2274
   macro avg     0.7864    0.7555    0.7692      2274
weighted avg     0.8616    0.8676    0.8639      2274


Bidirectional LSTM
Accuracy : 0.8826
Precision: 0.7532
Recall   : 0.5485
F1-score : 0.6347

Classification Report:
               precision    recall  f1-score   support

           0     0.9028    0.9589    0.9300      1851
           1     0.7532    0.5485    0.6347       423

    accuracy                         0.8826      2274
   macro avg     0.8280    0.7537    0.7824      2274
weighted avg     0.8750    0.8826    0.8751      2274


Saved evaluation_summary.csv with all metrics.


In [19]:
# Brief Analysis
import pandas as pd
import numpy as np
import os

# Load results_df if not present in memory
if 'results_df' not in globals():
    if os.path.exists("evaluation_summary.csv"):
        results_df = pd.read_csv("evaluation_summary.csv")
    else:
        raise FileNotFoundError("evaluation_summary.csv not found and results_df not in globals(). Run evaluation first.")

# Ensure consistent column names
expected_cols = {'model', 'accuracy', 'precision', 'recall', 'f1'}
if not expected_cols.issubset(set(results_df.columns)):
    raise ValueError(f"evaluation_summary.csv must contain columns: {expected_cols}")

# Normalize model name strings for grouping
def normalize(name):
    return name.strip().lower()

results_df['model_norm'] = results_df['model'].apply(normalize)

# Define groups
tfidf_models = results_df[results_df['model_norm'].str.contains('tf-idf') | results_df['model_norm'].str.contains('tfidf') | results_df['model_norm'].str.contains('logistic') | results_df['model_norm'].str.contains('svm')]
dl_models = results_df[results_df['model_norm'].str.contains('mlp') | results_df['model_norm'].str.contains('cnn') | results_df['model_norm'].str.contains('lstm') | results_df['model_norm'].str.contains('bilstm')]

# Safety checks
if tfidf_models.empty:
    print("Warning: no TF-IDF classical models found in results to compare.")
if dl_models.empty:
    print("Warning: no deep-learning (embedding) models found in results to compare.")

# Aggregate metrics
def safe_mean(df, col):
    return float(df[col].mean()) if not df.empty else float('nan')

summary = {
    'tfidf_count': len(tfidf_models),
    'dl_count': len(dl_models),
    'tfidf_mean_f1': safe_mean(tfidf_models, 'f1'),
    'dl_mean_f1': safe_mean(dl_models, 'f1'),
    'tfidf_mean_precision': safe_mean(tfidf_models, 'precision'),
    'dl_mean_precision': safe_mean(dl_models, 'precision'),
    'tfidf_mean_recall': safe_mean(tfidf_models, 'recall'),
    'dl_mean_recall': safe_mean(dl_models, 'recall'),
}

# Which DL architecture benefited most? pick best by F1
best_dl = None
if not dl_models.empty:
    best_dl_row = dl_models.loc[dl_models['f1'].idxmax()]
    best_dl = {
        'model': best_dl_row['model'],
        'f1': float(best_dl_row['f1']),
        'precision': float(best_dl_row['precision']),
        'recall': float(best_dl_row['recall'])
    }

# Compare best DL vs best TF-IDF
best_tfidf = None
if not tfidf_models.empty:
    best_tfidf_row = tfidf_models.loc[tfidf_models['f1'].idxmax()]
    best_tfidf = {
        'model': best_tfidf_row['model'],
        'f1': float(best_tfidf_row['f1']),
        'precision': float(best_tfidf_row['precision']),
        'recall': float(best_tfidf_row['recall'])
    }

# Did embeddings improve performance over TF-IDF? (simple comparison)
embeddings_improved = None
f1_diff = None
if not np.isnan(summary['tfidf_mean_f1']) and not np.isnan(summary['dl_mean_f1']):
    f1_diff = summary['dl_mean_f1'] - summary['tfidf_mean_f1']
    embeddings_improved = f1_diff > 0

# Are sequential models (LSTM) better than CNN/MLP?
lstm_row = dl_models[dl_models['model_norm'].str.contains('lstm')]
cnn_row = dl_models[dl_models['model_norm'].str.contains('cnn')]
mlp_row = dl_models[dl_models['model_norm'].str.contains('mlp')]

lstm_best = None
if not lstm_row.empty:
    lstm_best = lstm_row.loc[lstm_row['f1'].idxmax()].to_dict()
cnn_best = None
if not cnn_row.empty:
    cnn_best = cnn_row.loc[cnn_row['f1'].idxmax()].to_dict()
mlp_best = None
if not mlp_row.empty:
    mlp_best = mlp_row.loc[mlp_row['f1'].idxmax()].to_dict()

# Compose textual brief
lines = []
lines.append("BRIEF ANALYSIS - Disaster Tweets Experiment")
lines.append("==========================================")
lines.append(f"TF-IDF models found: {summary['tfidf_count']}, DL models found: {summary['dl_count']}")
lines.append("")
lines.append("Mean metrics:")
lines.append(f"  - TF-IDF mean F1 : {summary['tfidf_mean_f1']:.4f}" if not np.isnan(summary['tfidf_mean_f1']) else "  - TF-IDF mean F1 : N/A")
lines.append(f"  - DL mean F1     : {summary['dl_mean_f1']:.4f}" if not np.isnan(summary['dl_mean_f1']) else "  - DL mean F1     : N/A")
lines.append("")
if f1_diff is not None:
    lines.append(f"Overall difference (DL_mean_F1 - TFIDF_mean_F1) = {f1_diff:.4f}")
    lines.append(f"Conclusion: Embeddings {'improved' if embeddings_improved else 'did NOT improve'} performance compared to TF-IDF (by mean F1).")
else:
    lines.append("Not enough data to compare TF-IDF and DL mean F1.")

lines.append("")
if best_dl is not None:
    lines.append(f"Best DL model by F1: {best_dl['model']}  (F1={best_dl['f1']:.4f}, Precision={best_dl['precision']:.4f}, Recall={best_dl['recall']:.4f})")
else:
    lines.append("No DL models available to pick best.")

if best_tfidf is not None:
    lines.append(f"Best TF-IDF model by F1: {best_tfidf['model']}  (F1={best_tfidf['f1']:.4f}, Precision={best_tfidf['precision']:.4f}, Recall={best_tfidf['recall']:.4f})")
else:
    lines.append("No TF-IDF models available to pick best.")

lines.append("")
if lstm_best or cnn_best or mlp_best:
    lines.append("Architecture comparison (best per family if available):")
    if lstm_best:
        lines.append(f"  - LSTM best  : {lstm_best.get('model','LSTM')}  (F1={float(lstm_best['f1']):.4f})")
    else:
        lines.append("  - LSTM best  : N/A")
    if cnn_best:
        lines.append(f"  - CNN best   : {cnn_best.get('model','CNN')}   (F1={float(cnn_best['f1']):.4f})")
    else:
        lines.append("  - CNN best   : N/A")
    if mlp_best:
        lines.append(f"  - MLP best   : {mlp_best.get('model','MLP')}   (F1={float(mlp_best['f1']):.4f})")
    else:
        lines.append("  - MLP best   : N/A")

    # Is LSTM better than others?
    # Compare best available F1s
    best_family = None
    family_scores = {}
    if lstm_best:
        family_scores['LSTM'] = float(lstm_best['f1'])
    if cnn_best:
        family_scores['CNN'] = float(cnn_best['f1'])
    if mlp_best:
        family_scores['MLP'] = float(mlp_best['f1'])
    if family_scores:
        best_family = max(family_scores, key=family_scores.get)
        lines.append(f"\nConclusion: The family with highest best-F1 is {best_family} (score {family_scores[best_family]:.4f}).")
    else:
        lines.append("\nConclusion: Not enough DL models to compare families.")
else:
    lines.append("No DL architecture results available for family-wise comparison.")

# Final practical notes
lines.append("")
lines.append("Practical notes / next steps:")
lines.append(" - If TF-IDF outperforms embeddings, tune embeddings (use pretrained GloVe, allow fine-tuning) or add class weighting/threshold tuning.")
lines.append(" - If DL models overfit, reduce model size, add dropout, or get more data / augment.")
lines.append(" - Use threshold sweep and class-weighting to optimize for recall/F1 on the disaster class if that's your priority.")

# Print lines
report_text = "\n".join(lines)
print(report_text)

# Save to file
with open("brief_analysis.txt", "w") as f:
    f.write(report_text)

print("\nSaved brief analysis to brief_analysis.txt")


BRIEF ANALYSIS - Disaster Tweets Experiment
TF-IDF models found: 0, DL models found: 3

Mean metrics:
  - TF-IDF mean F1 : N/A
  - DL mean F1     : 0.4178

Not enough data to compare TF-IDF and DL mean F1.

Best DL model by F1: Bidirectional LSTM  (F1=0.6347, Precision=0.7532, Recall=0.5485)
No TF-IDF models available to pick best.

Architecture comparison (best per family if available):
  - LSTM best  : Bidirectional LSTM  (F1=0.6347)
  - CNN best   : 1D-CNN   (F1=0.6185)
  - MLP best   : MLP (avg embeddings)   (F1=0.0000)

Conclusion: The family with highest best-F1 is LSTM (score 0.6347).

Practical notes / next steps:
 - If TF-IDF outperforms embeddings, tune embeddings (use pretrained GloVe, allow fine-tuning) or add class weighting/threshold tuning.
 - If DL models overfit, reduce model size, add dropout, or get more data / augment.
 - Use threshold sweep and class-weighting to optimize for recall/F1 on the disaster class if that's your priority.

Saved brief analysis to brief_an