In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import os

drive_path = '/content/drive/MyDrive/BERT_for_Disaster_Classification'
print(os.listdir(drive_path))

In [None]:
db_path = '/content/drive/MyDrive/BERT_for_Disaster_Classification/Disaster_Tweets_Project/validation_dataset.csv'

In [None]:
import pandas as pd

df = pd.read_csv(db_path)

df['text'] = df['text'].fillna('')
df.shape

In [4]:
df.head()

Unnamed: 0,text,label
0,< url > boston bombings : in the face of trage...,1
1,"our service line is open for your comments, gr...",1
2,< number > babys < url >,0
3,< user > < hashtag > mecasalu ( rt ),0
4,itsfoodporn : s'mores stuffed cookies < url >,0


In [None]:
#Training our BERT model on the validation dataset

from transformers import BertForSequenceClassification, AutoTokenizer
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Load the model and tokenizer
model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/BERT_for_Disaster_Classification/Disaster_Tweets_Project/my_bert_model2/')
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/BERT_for_Disaster_Classification/Disaster_Tweets_Project/my_bert_model2/')

# Set the model to evaluation mode
model.eval()

# Define function to process the input text
def predict(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

    # Run the model to get the logits (classification scores)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Get the predicted class (e.g., 0 or 1 for binary classification)
    predicted_class = torch.argmax(logits, dim=1).item()

    # Calculate probabilities using softmax
    probabilities = F.softmax(logits, dim=1)

    # Get the confidence for the predicted class
    confidence = probabilities[0][predicted_class].item()

    return predicted_class, confidence

# Prepare a list to store the results
results = []

# Prepare a list to store the predicted labels
predicted_labels = []
true_labels = df['label'].tolist()

# Iterate over the dataset and make predictions
for index, row in df.iterrows():
    text = row['text']
    true_label = row['label']

    # Get the predicted class and confidence for the text
    predicted_class, confidence = predict(text)

    # Store the result for this row
    results.append({
        'text': text,
        'true_label': true_label,
        'predicted_class': predicted_class,
        'confidence': confidence
    })

    # Append the predicted class to the list for metrics calculation
    predicted_labels.append(predicted_class)


# Convert results to a pandas DataFrame
results_df = pd.DataFrame(results)

# Display the results in a table format
print("Predictions for Each Row:")
print(results_df)
results_df.to_csv('predictions_results_bert.csv', index=False)

# Calculate the evaluation metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

# Print the metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Predictions for Each Row:
                                                    text  true_label  \
0      < url > boston bombings : in the face of trage...           1   
1      our service line is open for your comments, gr...           1   
2                               < number > babys < url >           0   
3                   < user > < hashtag > mecasalu ( rt )           0   
4          itsfoodporn : s'mores stuffed cookies < url >           0   
...                                                  ...         ...   
40925  < hashtag > iwishicould keep a dead mouse in m...           0   
40926  on our way to < user > vs < user > see you all...           0   
40927  < hashtag > nepal : a race against time to sav...           1   
40928  updates < user > - in field with over < number...           1   
40929  norfolk news local puerto ricans pray for thei...           1   

       predicted_class  confidence  
0                    1    0.999916  
1                    1    0.999984 