In [183]:
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [185]:
df=pd.read_csv("tweets.csv")
df.head()

Unnamed: 0,id,keyword,location,text,target
0,0,ablaze,,"Communal violence in Bhainsa, Telangana. ""Ston...",1
1,1,ablaze,,Telangana: Section 144 has been imposed in Bha...,1
2,2,ablaze,New York City,Arsonist sets cars ablaze at dealership https:...,1
3,3,ablaze,"Morgantown, WV",Arsonist sets cars ablaze at dealership https:...,1
4,4,ablaze,,"""Lord Jesus, your love brings freedom and pard...",0


In [187]:
print(df.columns)

Index(['id', 'keyword', 'location', 'text', 'target'], dtype='object')


In [191]:
texts = df['text'].values
labels = df['target'].values

try:
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    print("BERT tokenizer loaded.")
except Exception as e:
    print(f"Error loading tokenizer: {e}")
    exit(1)

max_length = 128
input_ids = []
attention_masks = []

for text in texts:
    encoded = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='tf'
    )
    input_ids.append(encoded['input_ids'].numpy()[0])
    attention_masks.append(encoded['attention_mask'].numpy()[0])

input_ids = np.array(input_ids)
attention_masks = np.array(attention_masks)
labels = np.array(labels)

print("Tokenization and preprocessing completed.")

BERT tokenizer loaded.
Tokenization and preprocessing completed.


In [193]:
train_inputs, test_inputs, train_masks, test_masks, train_labels, test_labels = train_test_split(
    input_ids, attention_masks, labels, test_size=0.2, random_state=42
)

train_inputs = tf.convert_to_tensor(train_inputs, dtype=tf.int32)
test_inputs = tf.convert_to_tensor(test_inputs, dtype=tf.int32)
train_masks = tf.convert_to_tensor(train_masks, dtype=tf.int32)
test_masks = tf.convert_to_tensor(test_masks, dtype=tf.int32)
train_labels = tf.convert_to_tensor(train_labels, dtype=tf.int32)
test_labels = tf.convert_to_tensor(test_labels, dtype=tf.int32)

print("Data split and converted to tensors.")

Data split and converted to tensors.


In [200]:
predictions = model.predict([test_inputs, test_masks])
predicted_labels = np.argmax(predictions.logits, axis=1)

accuracy = accuracy_score(test_labels.numpy(), predicted_labels)
print(f"Accuracy: {accuracy:.4f}")

cm = confusion_matrix(test_labels.numpy(), predicted_labels)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Disaster', 'Disaster'], yticklabels=['Non-Disaster', 'Disaster'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.savefig('confusion_matrix.png')
plt.close()
print("Confusion matrix saved as 'confusion_matrix.png'.")

Accuracy: 0.1741
Confusion matrix saved as 'confusion_matrix.png'.
