In [None]:
#importing libraries
import os
import joblib
import pandas as pd
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from sklearn.metrics import classification_report

In [None]:
from google.colab import drive
drive.mount('/content/drive') #mounting to google drive

Mounted at /content/drive


In [None]:
target_dir = '/content/drive/My Drive/Colab Notebooks/NLP_Final/models'
model_path = os.path.join(target_dir, 'logreg_model.joblib')

# Loading the model
model = joblib.load(model_path)

In [None]:
class ModelTester:
    def __init__(self, model_dir='models'):

        # Load Logistic Regression model and vectorizer
        self.logreg = joblib.load(os.path.join(model_dir, 'logreg_model.joblib'))
        self.vectorizer = joblib.load(os.path.join(model_dir, 'tfidf_vectorizer.joblib'))

        # Load DistilBERT model and tokenizer
        self.bert_model = DistilBertForSequenceClassification.from_pretrained(
            os.path.join(model_dir, 'distilbert_model'))
        self.bert_tokenizer = DistilBertTokenizer.from_pretrained(
            os.path.join(model_dir, 'distilbert_tokenizer'))
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.bert_model.to(self.device)

    def predict(self, text, model_type='both'):

        results = {}

        if model_type in ['both', 'logreg']:
            X = self.vectorizer.transform([text])
            results['logreg'] = self.logreg.predict(X)[0]

        if model_type in ['both', 'bert']:
            inputs = self.bert_tokenizer(text, return_tensors="pt",
                                         truncation=True, padding=True).to(self.device)
            with torch.no_grad():
                logits = self.bert_model(**inputs).logits
            results['bert'] = torch.argmax(logits, dim=1).item()

        return results

    def evaluate(self, test_file='/content/drive/My Drive/Colab Notebooks/NLP_Final/test.csv'):

        test_df = pd.read_csv(test_file)
        texts = test_df['tweet_text'].tolist()
        y_true = test_df['sentiment'].tolist()

        # Mapping for BERT's numeric predictions to string labels
        label_mapping = {0: 'negative', 1: 'positive', 2: 'neutral'}


        # Get predictions
        y_pred_logreg = [self.predict(text, 'logreg')['logreg'] for text in texts]
        y_pred_bert = [label_mapping[self.predict(text, 'bert')['bert']] for text in texts]

        # Generate reports
        print("Logistic Regression Performance:")
        print(classification_report(y_true, y_pred_logreg))

        print("\nDistilBERT Performance:")
        print(classification_report(y_true, y_pred_bert))

        return {
            'logreg_report': classification_report(y_true, y_pred_logreg, output_dict=True),
            'bert_report': classification_report(y_true, y_pred_bert, output_dict=True)
        }

if __name__ == "__main__":
    tester = ModelTester(model_dir='/content/drive/My Drive/Colab Notebooks/NLP_Final/models')

    # Test single prediction
    sample_text = "The president's speech was inspiring and thoughtful"
    print(f"\nSample Prediction for: '{sample_text}'")
    print(tester.predict(sample_text))

    # Full evaluation
    tester.evaluate()



Sample Prediction for: 'The president's speech was inspiring and thoughtful'
{'logreg': 'positive', 'bert': 1}

Running full evaluation...
Logistic Regression Performance:
              precision    recall  f1-score   support

    negative       0.60      1.00      0.75         3
     neutral       1.00      1.00      1.00        13
    positive       1.00      0.94      0.97        34

    accuracy                           0.96        50
   macro avg       0.87      0.98      0.91        50
weighted avg       0.98      0.96      0.96        50


DistilBERT Performance:
              precision    recall  f1-score   support

    negative       0.30      1.00      0.46         3
     neutral       0.00      0.00      0.00        13
    positive       0.00      0.00      0.00        34

    accuracy                           0.06        50
   macro avg       0.10      0.33      0.15        50
weighted avg       0.02      0.06      0.03        50

