In [1]:
import pyuac

def main():
    print("Do stuff here that requires being run as an admin.")
    # The window will disappear as soon as the program exits!
    input("Press enter to close the window. >")

if __name__ == "__main__":
    if not pyuac.isUserAdmin():
        print("Re-launching as admin!")
        pyuac.runAsAdmin()
    else:        
        main()

Re-launching as admin!


In [2]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request

In [3]:


# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
 
 
    for t in text.split(" "):
        t = '@user' if t.startswith('@') and len(t) > 1 else t
        t = 'http' if t.startswith('http') else t
        new_text.append(t)
    return " ".join(new_text)

In [14]:
tasks = ['emoji', 'emotion', 'hate', 'irony', 'offensive', 'sentiment', 'sentiment-latest']
for task in tasks:
    print(task)
    MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)

    tokenizer.save_pretrained(MODEL)
    model.save_pretrained(MODEL) 

emoji
emotion


Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

hate


Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

irony


Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

offensive


Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

sentiment


In [29]:
def get_labels(task):
    # download label mapping
    labels=[]
    mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
    with urllib.request.urlopen(mapping_link) as f:
        html = f.read().decode('utf-8').split("\n")
        csvreader = csv.reader(html, delimiter='\t')
    labels = [row[1] for row in csvreader if len(row) > 1]
    return labels

In [34]:
def get_analysis(task, text):
    labels = get_labels(task)
    MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
    
    text = preprocess(text)
    encoded_input = tokenizer(text, return_tensors='pt')
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    
    ranking = np.argsort(scores)
    ranking = ranking[::-1]
    print(f"According to our {task} model, this tweet is :")

    for i in range(scores.shape[0]):
        l = labels[ranking[i]]
        s = scores[ranking[i]]
        print(f"{i+1}) {l} {np.round(float(s), 4)}")
    
    return scores

In [36]:
tweet = "Great I get fired for grossophobia..."

for task in tasks:
    get_analysis(task, tweet)

According to our emoji model, this tweet is :
1) 😂 0.4513
2) 😁 0.1495
3) 😜 0.0924
4) 😉 0.0856
5) 😊 0.0813
6) 😎 0.0553
7) 😍 0.0203
8) 🔥 0.0095
9) 😘 0.0077
10) ❤ 0.0075
11) 💯 0.0071
12) ✨ 0.0061
13) 🇺🇸 0.0041
14) 📷 0.0038
15) 💕 0.0037
16) 💜 0.0035
17) 💙 0.0034
18) 📸 0.0032
19) 🎄 0.0028
20) ☀ 0.0019
According to our emotion model, this tweet is :
1) anger 0.9484
2) sadness 0.0372
3) joy 0.009
4) optimism 0.0054
According to our hate model, this tweet is :
1) not-hate 0.9682
2) hate 0.0318
According to our irony model, this tweet is :
1) irony 0.9736
2) non_irony 0.0264
According to our offensive model, this tweet is :
1) offensive 0.5013
2) not-offensive 0.4987
According to our sentiment model, this tweet is :
1) negative 0.7745
2) neutral 0.191
3) positive 0.0345


## Sentiment Latest

In [None]:
# PT
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

In [None]:
# PT
MODEL = f"cardiffnlp/twitter-roberta-base-irony"
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

text = "Good night 😊"
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)

# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)

# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = labels[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")