<a href="https://colab.research.google.com/github/aLehav/Olami/blob/main/Article_Sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Working Templated Sentiment

In [49]:
STRING='Hillel'
CSV_NAME='hypothesis_sentiment_results'
DF_PATH='ucf_hillel.csv'

In [50]:
%pip install transformers --quiet

Note: you may need to restart the kernel to use updated packages.




In [51]:
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import matplotlib.pyplot as plt

In [52]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Check and set device
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli').to(device)
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

In [53]:
def compute_sentiment(row):
    text_entries = row['matching_text']
    scores = []
    for text_entry in text_entries:
      scores.append(tuple(get_sentiment_pytorch(text=text_entry, string=STRING)))
      torch.cuda.empty_cache()
    print(np.average(scores, axis=0))
    return np.average(scores, axis=0)

def get_sentiment_pytorch(text, string):
    premise = text
    labels = ['positive','negative','neutral']
    hypotheses = [f'This example is {label} towards {string}.' for label in labels]

    # Tokenize all premises and hypotheses
    inputs = tokenizer.batch_encode_plus(
        [(premise, hypothesis) for hypothesis in hypotheses],
        truncation=True,
        padding=True,
        return_tensors='pt'
    )
    
    # Run through model pre-trained on MNLI
    logits = nli_model(**inputs.to(device)).logits

    # Calculate probabilities
    entail_contradiction_logits = logits[:, [0, 2]]
    probs = entail_contradiction_logits.softmax(dim=1)
    prob_label_is_true = probs[:, 1]

    return prob_label_is_true.tolist()

def plot_sentiment_over_time(df):
    # Set up the figure
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot the data
    ax.plot(df['date'], df['positive'], label='Positive')
    ax.plot(df['date'], df['negative'], label='Negative')
    ax.plot(df['date'], df['neutral'], label='Neutral')

    # Set the title and axis labels
    ax.set_title(f'UCF {STRING} Sentiment Over Time')
    ax.set_xlabel('Date')
    ax.set_ylabel('Sentiment')
    
    # Rotate x-axis tick labels for better visibility
    plt.xticks(rotation=60)

    # Add a legend
    ax.legend()

    # Adjust the figure layout
    fig.tight_layout()

    # Save the plot
    plt.savefig('base_sentiment_results.png', bbox_inches='tight')

    # Show the plot
    plt.show()

In [54]:
df = pd.read_csv(DF_PATH,index_col=0)
df['matching_text'] = df['matching_text'].apply(eval) 
df[['positive', 'negative', 'neutral']] = df.apply(compute_sentiment, axis=1, result_type='expand')
df[['date','positive','negative','neutral']].to_csv(f'{CSV_NAME}.csv')
df[['school','date','txt_file','matching_text', 'matching_links','positive','negative','neutral']].to_csv(f'{CSV_NAME}_expanded.csv')
plot_sentiment_over_time(df)

[0.85544944 0.02847679 0.04757296]
[0.82820231 0.20172213 0.33256489]
[0.09061333 0.10865245 0.04800795]
[0.21911912 0.2757456  0.10071135]


KeyboardInterrupt: 