# Preparation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install emoji transformers



In [3]:
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, pipeline

In [4]:
path = '/content/drive/MyDrive/Riza_Jingxuan/Year_3_Project/SS4DAP/Model/'

# NER

## NER1_samrawal

In [5]:
label_list = ['B-problem', 'B-treatment', 'I-test', 'I-treatment', 'B-test', 'O', 'I-problem']

tokenizer = AutoTokenizer.from_pretrained(path + 'NER1_samrawal')

paragraph = '''Stalybridge GP Richard Bircher says he’s seen people with nosebleeds, bad coughs and sore eyes from the smoke as the fire still burns on Saddleworth Moor. 100 firefighters are now up there tackling it.'''
tokens = tokenizer(paragraph)
torch.tensor(tokens['input_ids']).unsqueeze(0).size()

model = AutoModelForTokenClassification.from_pretrained(path + 'NER1_samrawal', num_labels=len(label_list))
predictions = model.forward(input_ids=torch.tensor(tokens['input_ids']).unsqueeze(0), attention_mask=torch.tensor(tokens['attention_mask']).unsqueeze(0))
predictions = torch.argmax(predictions.logits.squeeze(), axis=1)
predictions = [label_list[i] for i in predictions]

words = tokenizer.batch_decode(tokens['input_ids'])
print(pd.DataFrame({'ner': predictions, 'words': words}))

          ner         words
0           O         [CLS]
1           O            st
2           O           aly
3           O        bridge
4           O            gp
5           O       richard
6           O         birch
7           O            er
8           O          says
9           O            he
10          O             ’
11          O             s
12          O          seen
13          O        people
14          O          with
15  B-problem          nose
16  B-problem           ble
17  B-problem            ed
18  B-problem             s
19          O             ,
20          O           bad
21  I-problem         cough
22  B-problem             s
23          O           and
24  B-problem          sore
25  I-problem          eyes
26          O          from
27          O           the
28          O         smoke
29          O            as
30          O           the
31          O          fire
32          O         still
33          O         burns
34          O       

## NER2_alvaroalon2

In [6]:
label_list = ['B-DISEASE', 'I-DISEASE', 'O']

tokenizer = AutoTokenizer.from_pretrained(path + 'NER2_alvaroalon2')
model = AutoModelForTokenClassification.from_pretrained(path + 'NER2_alvaroalon2', num_labels=len(label_list))

paragraph = '''Stalybridge GP Richard Bircher says he’s seen people with nosebleeds, bad coughs and sore eyes from the smoke as the fire still burns on Saddleworth Moor. 100 firefighters are now up there tackling it.'''
tokens = tokenizer(paragraph)
torch.tensor(tokens['input_ids']).unsqueeze(0).size()

predictions = model.forward(input_ids=torch.tensor(tokens['input_ids']).unsqueeze(0), attention_mask=torch.tensor(tokens['attention_mask']).unsqueeze(0))
predictions = torch.argmax(predictions.logits.squeeze(), axis=1)
predictions = [label_list[i] for i in predictions]

words = tokenizer.batch_decode(tokens['input_ids'])
print(pd.DataFrame({'ner': predictions, 'words': words}))

          ner         words
0           O         [CLS]
1           O            St
2           O           aly
3           O        bridge
4           O            GP
5           O       Richard
6           O         Birch
7           O            er
8           O          says
9           O            he
10          O             ’
11          O             s
12          O          seen
13          O        people
14          O          with
15  B-DISEASE          nose
16  B-DISEASE           ble
17  B-DISEASE           eds
18          O             ,
19  B-DISEASE           bad
20  I-DISEASE         cough
21  I-DISEASE             s
22          O           and
23  B-DISEASE          sore
24  I-DISEASE          eyes
25          O          from
26          O           the
27          O         smoke
28          O            as
29          O           the
30          O          fire
31          O         still
32          O         burns
33          O            on
34          O       

# Sentiment

## Sentiment0_irrelevant

In [7]:
tokenizer = AutoTokenizer.from_pretrained(path + 'Sentiment0_irrelevant')
model = AutoModelForSequenceClassification.from_pretrained(path + 'Sentiment0_irrelevant', num_labels=2)

generator = pipeline(task="text-classification", model=model, tokenizer=tokenizer)
generator(['What a nice day!', 'Hills Ablaze Above Manchester as U.K. Wildfire Rages for 4th Day https://t.co/vArOufXTet https://t.co/vArOufXTet', '@JBBuczek Quite happy to take a little off you. Raging wildfire on Saddleworth Moor near Manchester. They seem to think some heavy rain is the only thing now to put it out. But whatever the weather enjoy your day John.'])

[{'label': 'LABEL_0', 'score': 0.8281125426292419},
 {'label': 'LABEL_1', 'score': 0.9838178157806396},
 {'label': 'LABEL_1', 'score': 0.9850481748580933}]

## Sentiment1_cardiffnlp

In [8]:
tokenizer = AutoTokenizer.from_pretrained(path + 'Sentiment1_cardiffnlp')
model = AutoModelForSequenceClassification.from_pretrained(path + 'Sentiment1_cardiffnlp', num_labels=3)

generator = pipeline(task="text-classification", model=model, tokenizer=tokenizer)
generator(['big wildfire', 'I can actually smell smoke and see it in the air in Salford. Is this from the Saddleworth moors fire?'])

[{'label': 'LABEL_1', 'score': 0.9514998197555542},
 {'label': 'LABEL_0', 'score': 0.9926793575286865}]

## Sentiment2_finiteautomata

In [9]:
tokenizer = AutoTokenizer.from_pretrained(path + 'Sentiment2_finiteautomata')
model = AutoModelForSequenceClassification.from_pretrained(path + 'Sentiment2_finiteautomata', num_labels=3)

generator = pipeline(task="text-classification", model=model, tokenizer=tokenizer)
generator(['big wildfire', 'I can actually smell smoke and see it in the air in Salford. Is this from the Saddleworth moors fire?'])

[{'label': 'NEU', 'score': 0.9853805899620056},
 {'label': 'NEG', 'score': 0.9958932399749756}]