# Preliminary

In [1]:
import pyuac

def main():
    print("Do stuff here that requires being run as an admin.")
    # The window will disappear as soon as the program exits!
    input("Press enter to close the window. >")

if __name__ == "__main__":
    if not pyuac.isUserAdmin():
        print("Re-launching as admin!")
        pyuac.runAsAdmin()
    else:        
        main()

Re-launching as admin!


In [2]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer

import numpy as np
from scipy.special import softmax
import csv
import shutil

In [3]:
tasks = ['emoji', 'emotion', 'hate', 'irony', 'offensive', 'sentiment', 'sentiment-latest']

# Process Function

In [4]:
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
 
 
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [5]:

def get_labels(task):
    # download label mapping
    labels=[]
    mapping_link = f"./cardiffnlp/twitter-roberta-base-{task}/mapping.txt"
    with open(mapping_link, encoding="utf-8") as f:
        maps = f.read().split("\n")
        csvreader = csv.reader(maps, delimiter='\t')
    labels = [row[1] for row in csvreader if len(row) > 1]
    return labels

In [6]:
def get_analysis(task, text):
    labels = get_labels(task)
    MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
    
    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    
    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    print(f"According to our {task} model, this tweet is :")

    for i in range(scores.shape[0]):
        l = labels[ranking[i]]
        s = scores[ranking[i]]
        print(f"{i+1}) {l} {np.round(float(s), 4)}")
    
    return scores

# Let's Analyse

In [7]:
tweet = "My IA teacher is so handsome !"

for task in tasks:
    get_analysis(task, tweet)

According to our emoji model, this tweet is :
1) 😍 0.6581
2) 😊 0.0618
3) ❤ 0.0598
4) 😂 0.0373
5) 💕 0.0353
6) 💙 0.0295
7) 😁 0.0261
8) 💜 0.0204
9) 😘 0.0193
10) ✨ 0.0105
11) 😎 0.0102
12) 🔥 0.0088
13) 😉 0.0064
14) 😜 0.0055
15) 🎄 0.0032
16) 💯 0.0026
17) 🇺🇸 0.0018
18) 📷 0.0015
19) 📸 0.0011
20) ☀ 0.0008
According to our emotion model, this tweet is :
1) joy 0.9542
2) optimism 0.0177
3) anger 0.0145
4) sadness 0.0136
According to our hate model, this tweet is :
1) not-hate 0.9523
2) hate 0.0477
According to our irony model, this tweet is :
1) irony 0.9348
2) non_irony 0.0652
According to our offensive model, this tweet is :
1) not-offensive 0.8711
2) offensive 0.1289
According to our sentiment model, this tweet is :
1) positive 0.9846
2) neutral 0.0132
3) negative 0.0021
According to our sentiment-latest model, this tweet is :
1) positive 0.9868
2) neutral 0.009
3) negative 0.0042


In [8]:
# PT
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

NameError: name 'tokenizer' is not defined

In [None]:
# PT
MODEL = f"cardiffnlp/twitter-roberta-base-irony"
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")