# TRAINING TEXT CLASSIFIERS WITH SPACY

In this lab we will train different text classifiers with spacy.

1. Read through the code and train to add more inline documentation as you try to understand the functionality.

2. We will adapt the code to train two different fake news classifiers: one on general fake news from 6 different domains and another one on celebrities, were there are legitimate news but also news which are false gossip.



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# We will be using spacy v2
!pip install -U spacy==2.3.7

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spacy==2.3.7
  Downloading spacy-2.3.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
Collecting srsly<1.1.0,>=1.0.2
  Downloading srsly-1.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.1/211.1 KB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
Collecting thinc<7.5.0,>=7.4.1
  Downloading thinc-7.4.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
Collecting plac<1.2.0,>=0.9.6
  Downloading plac-1.1.3-py2.py3-none-any.whl (20 kB)
Collecting catalogue<1.1.0,>=0.0.7
  Downloading catalogue-1.0.2-py2.py3-none-any.whl (16 kB)

In [3]:
# TODO install and test the language modules of your choice following the https://spacy.io/usage

!pip install spacy

!python -m spacy download en_core_web_sm
#!python -m spacy download en_core_web_md
!python https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.0/en_core_web_lg-2.2.0.tar.gz
#nlp = spacy.load("en_core_web_sm")
#nlp = spacy.load("en_core_web_md"

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en_core_web_sm==2.3.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz (12.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.0/12.0 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: en_core_web_sm
  Building wheel for en_core_web_sm (setup.py) ... [?25l[?25hdone
  Created wheel for en_core_web_sm: filename=en_core_web_sm-2.3.1-py3-none-any.whl size=12047104 sha256=2b54f28d87e2b9fa36f6b8db074148ff0a342a68d5087b9b4b0c5f53c452e771
  Stored in directory: /root/.cache/pip/wheels/ee/4d/f7/563214122be1540b5f9197b52cb3ddb9c4a8070808b22d5a84
Successfully built en_core_web_sm
Installing collected pa

In [4]:
import spacy
import csv
import random
import time
import numpy as np
import pandas as pd
import re
import string

from spacy.util import minibatch, compounding
import sys
from spacy import displacy
from itertools import chain

from sklearn.metrics import classification_report

# TODO add inline documentation describing the functionality of each function

''' The "load_data" function loads data from multiple files specified in the input list "fnames".
Each file is loaded into a Pandas DataFrame using the "pd.read_csv" function, separated by tab characters and encoded in 'utf-8'.
The loaded data from each file is concatenated into a single DataFrame using "pd.concat".
The unique values of the 'Target' column are extracted and stored in a list "targets",
 and the combined DataFrame and the list of targets are returned by the function.'''

# load data
def load_data(fnames):
    data = []
    for fname in fnames:
        data.append(pd.read_csv(fname, sep='\t', encoding='utf-8'))
    data = pd.concat(data)
    targets = set(data['Target'])
    return data, list(targets)

# pre-process tweets
def cleanup(tweet):
    """we remove urls, hashtags and user symbols"""
    tweet = re.sub(r"http\S+", "", tweet.replace("#", "").replace("@", "").replace('\n', ' ').replace('\t', ' '))
    return tweet

In [5]:
# data path. trial data used as training too.
trial_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-trialdata.utf-8.txt"
train_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-trainingdata.utf-8.txt"
test_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-testdata-gold.txt"

training_data, targets = load_data([trial_file, train_file])
training_data['Clean_tweet'] = training_data['Tweet'].apply(cleanup)

test_data, _ = load_data([test_file])
test_data['Clean_tweet'] = test_data['Tweet'].apply(cleanup)
display(training_data)

Unnamed: 0,ID,Target,Tweet,Stance,Clean_tweet
0,1,Hillary Clinton,"@tedcruz And, #HandOverTheServer she wiped cle...",AGAINST,"tedcruz And, HandOverTheServer she wiped clean..."
1,2,Hillary Clinton,Hillary is our best choice if we truly want to...,FAVOR,Hillary is our best choice if we truly want to...
2,3,Hillary Clinton,@TheView I think our country is ready for a fe...,AGAINST,TheView I think our country is ready for a fem...
3,4,Hillary Clinton,I just gave an unhealthy amount of my hard-ear...,AGAINST,I just gave an unhealthy amount of my hard-ear...
4,5,Hillary Clinton,@PortiaABoulger Thank you for adding me to you...,NONE,PortiaABoulger Thank you for adding me to your...
...,...,...,...,...,...
2809,2910,Legalization of Abortion,"There's a law protecting unborn eagles, but no...",AGAINST,"There's a law protecting unborn eagles, but no..."
2810,2911,Legalization of Abortion,I am 1 in 3... I have had an abortion #Abortio...,AGAINST,I am 1 in 3... I have had an abortion Abortion...
2811,2912,Legalization of Abortion,How dare you say my sexual preference is a cho...,AGAINST,How dare you say my sexual preference is a cho...
2812,2913,Legalization of Abortion,"Equal rights for those 'born that way', no rig...",AGAINST,"Equal rights for those 'born that way', no rig..."


In [6]:
for target in targets:
  training_data[training_data['Target'] == target][['Stance', 'Clean_tweet']].to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.{target}.tsv",
          sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")
  test_data[test_data['Target'] == target][['Stance', 'Clean_tweet']].to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.{target}.tsv",
          sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")

In [7]:
'''The function loads a CSV file into a pandas dataframe "training_data".
It then outputs the count of each unique value in the "Stance" column.
The "Clean_tweet" and "Stance" columns are extracted into lists "train_texts" and "train_cats".
A list of dictionaries "final_train_cats" with binary values is created to represent the stance categories.
The function returns the list of tuples "train_data", the "train_texts" and "train_cats"
'''

def load_data_spacy(fname):
  training_data = pd.read_csv(fname, sep='\t', encoding='utf-8')
  #train_data.dropna(axis = 0, how ='any',inplace=True)
  #train_data['Num_words_text'] = train_data['text'].apply(lambda x:len(str(x).split())) 
  #mask = train_data['Num_words_text'] >2
  #train_data = train_data[mask]
  print(training_data['Stance'].value_counts())
   
  train_texts = training_data['Clean_tweet'].tolist()
  train_cats = training_data['Stance'].tolist()
  final_train_cats=[]
  for cat in train_cats:
    cat_list = {}
    if cat == 'AGAINST':
      cat_list['AGAINST'] =  1
      cat_list['FAVOR'] =  0
      cat_list['NONE'] =  0
    elif cat == 'FAVOR':
      cat_list['AGAINST'] =  0
      cat_list['FAVOR'] =  1
      cat_list['NONE'] =  0
    else:
      cat_list['AGAINST'] =  0
      cat_list['FAVOR'] =  0
      cat_list['NONE'] =  1
    final_train_cats.append(cat_list)
    
  train_data = list(zip(train_texts, [{"cats": cats} for cats in final_train_cats]))
  return train_data, train_texts, train_cats


In [8]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Feminist Movement.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Feminist Movement.tsv')
print(len(test_data))

AGAINST    328
FAVOR      210
NONE       126
Name: Stance, dtype: int64
[('Always a delight to see chest-drumming alpha males hiss and scuttle backwards up the wall when a feminist enters the room. manly SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("Sometimes I overheat and want to take off my shirt but can't because of social expectations of people with breasts. ;n; SemST", {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('If feminists spent 1/2 as much time reading papers as they do tumblr they would be real people, not ignorant sexist bigots. SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Stupid Feminists, the civilization you take for granted was built with the labour, blood sweat and tears of men. SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("YOU'RE A GIRL AND HAVE A SEX DRIVE!? YOU MUST BE A SLUT! feminist SemST", {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("Suns out....  Dresses out...  StreetHarassment out...  This shouldn't be 

In [9]:
'''This function sorts a list of sublists in descending order based on the second element of each sublist.
 The "key" parameter of the "sorted" function is set to a lambda function that returns the second element of each sublist, and the
  "reverse" parameter is set to "True", 
so the resulting list will be sorted in descending order. '''

def Sort(sub_li):
  # reverse = True (Soresulting_list = list(first_list)rts in Descending  order) 
  # key is set to sort using second element of  
  # sublist lambda has been used 
  return(sorted(sub_li, key = lambda x: x[1],reverse=True))  

# run the predictions on each sentence in the evaluation  dataset, and return the metrics
'''The "evaluate" function evaluates the performance of a tokenizer and a text categorization model on a set of test text data.
 It tokenizes the test texts, processes them through the text categorization model to get prediction scores for each text, extracts the top-rated category for each text,
  and compares the predicted categories with the true categories. The function then prints a performance evaluation report in terms of precision, recall, and F1-score for each class. '''
def evaluate(tokenizer, textcat, test_texts, test_cats ):
  docs = (tokenizer(text) for text in test_texts)
  preds = []
  for i, doc in enumerate(textcat.pipe(docs)):
    #print(doc.cats.items())
    scores = Sort(doc.cats.items())
    #print(scores)
    catList=[]
    for score in scores:
      catList.append(score[0])
    preds.append(catList[0])
        
  labels = ['AGAINST', 'FAVOR']
 
  print(classification_report(test_cats, preds,labels=labels))
    

In [10]:
def train_spacy(  train_data, iterations,test_texts,test_cats, model_arch, dropout = 0.3, model=None, init_tok2vec=None):
    ''' Train a spacy model, which can be queried against with test data
   
    train_data : training data in the format of (sentence, {cats: ['AGAINST'|'FAVOR'|'NONE']})
    labels : a list of unique annotations
    iterations : number of training iterations
    dropout : dropout proportion for training
    display_freq : number of epochs between logging losses to console
    '''
    
    nlp = spacy.load('en_core_web_sm')
    

    # add the text classifier to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "textcat" not in nlp.pipe_names:
        textcat = nlp.create_pipe(
            "textcat", config={"exclusive_classes": True, "architecture": model_arch}
        )
        nlp.add_pipe(textcat, last=True)
        
    # otherwise, get it, so we can add labels to it
    else:
        textcat = nlp.get_pipe("textcat")

    # add label to text classifier
    textcat.add_label("AGAINST")
    textcat.add_label("FAVOR")
    textcat.add_label("NONE")
   

    # get names of other pipes to disable them during training
    pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    with nlp.disable_pipes(*other_pipes):  # only train textcat
        optimizer = nlp.begin_training()
        if init_tok2vec is not None:
            with init_tok2vec.open("rb") as file_:
                textcat.model.tok2vec.from_bytes(file_.read())
        print("Training the model...")
        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
        batch_sizes = compounding(16.0, 64.0, 1.5)
        for i in range(iterations):
            print('Iteration: '+str(i))
            #start_time = time.process_time()
            losses = {}
            # batch up the examples using spaCy's minibatch
            random.shuffle(train_data)
            batches = minibatch(train_data, size=batch_sizes)
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=dropout, losses=losses)
            with textcat.model.use_params(optimizer.averages):
                # evaluate on the test data 
                evaluate(nlp.tokenizer, textcat, test_texts,test_cats)
            #print ('Elapsed time'+str(time.process_time() - start_time)+  "seconds")
        with nlp.use_params(optimizer.averages):
            model_name = model_arch + "_hiliary_clinton"
            #model_name = model_arch + "_legalization_of_abortion"
            #model_name = model_arch + "_Atheism"
            #model_name = model_arch + "_climate_change"
            #model_name = model_arch + "_feminism"
            filepath = "/content/drive/MyDrive/NLP_Applications_1/DATA" + model_name 
            nlp.to_disk(filepath)
    return nlp

In [11]:
nlp = train_spacy(training_data, 10, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.64      1.00      0.78       183
       FAVOR       0.00      0.00      0.00        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.32      0.50      0.39       241
weighted avg       0.49      0.76      0.59       241

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.64      1.00      0.78       183
       FAVOR       0.00      0.00      0.00        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.32      0.50      0.39       241
weighted avg       0.49      0.76      0.59       241

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.64      1.00      0.78       183
       FAVOR       0.00      0.00      0.00        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.32      0.50      0.39       241
weighted avg       0.49      0.76      0.59       241

Iteration: 3
              precision    recall  f1-score   support

     AGAINST       0.65      0.99      0.78       183
       FAVOR       0.33      0.03      0.06        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.49      0.51      0.42       241
weighted avg       0.57      0.76      0.61       241

Iteration: 4
              precision    recall  f1-score   support

     AGAINST       0.65      0.98      0.78       183
       FAVOR       0.30      0.05      0.09        58

   micro avg       0.64      0.76      0.69       241
   macro avg       0.48      0.51      0.43       241
weighted avg       0.57      0.76      0.61     

In [12]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.64      1.00      0.78       183
       FAVOR       0.00      0.00      0.00        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.32      0.50      0.39       241
weighted avg       0.49      0.76      0.59       241

Iteration: 1
              precision    recall  f1-score   support

     AGAINST       0.64      1.00      0.78       183
       FAVOR       0.00      0.00      0.00        58

   micro avg       0.64      0.76      0.70       241
   macro avg       0.32      0.50      0.39       241
weighted avg       0.49      0.76      0.60       241

Iteration: 2
              precision    recall  f1-score   support

     AGAINST       0.70      0.58      0.64       183
       FAVOR       0.26      0.59      0.36        58

   micro avg       0.50      0.59      0.54       241
   macro avg       0.48      0.59      0.50       241
weighted avg       0.60      0.59      0.57     

In [13]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_feminism")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: sometiimes you just feel like punching a feminist in the face SemST
Gold Label:AGAINST
 Predicted Label:
{'AGAINST': 0.42418912053108215, 'FAVOR': 0.3536691665649414, 'NONE': 0.22214169800281525}


# ASSIGNMENTS

1. TODO Train the classifiers for the other 4 targets in the Stance SemEval 2016 dataset.

2. TODO Reuse the above code to train a new classifier for fake news using the celebrity and the fake news datasets: 

  Data: "/content/drive/My Drive/Colab Notebooks/2023-ILTAPP/datasets/fake_rada"

  2.1 HINT: You need to (i) load the data into a pandas dataframe; (ii) modify the labels from the converter and training functions.

  2.2 HINT:Once you have a pandas dataframe, it is easy to split the data into 80% for training and 20% for testing.

3. TODO Try the different spacy language models to see the difference in performance.

1. TODO Train the classifiers for the other 4 targets in the Stance SemEval 2016 dataset.

In [49]:
print(targets)

['Atheism', 'Legalization of Abortion', 'Hillary Clinton', 'Climate Change is a Real Concern', 'Feminist Movement']


#1.TODO Train the classifiers for the other 4 targets in the Stance SemEval 2016 dataset

#Climate_change

In [None]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Climate Change is a Real Concern.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Climate Change is a Real Concern.tsv')
print(len(test_data))

FAVOR      212
NONE       168
AGAINST     15
Name: Stance, dtype: int64
[('We cant deny it, its really happening.  SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('RT cderworiz: Timelines are short. Strategy must be in place by climate change conference in Paris by December. ableg SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('SO EXCITING! Meaningful climate change action is on the way! abpoli GHG SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('Delivering good jobs for Albertans, maintaining a stable economy & meeting climate change strategy. Good goals. abpoli GHG SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('davidswann says he wants carbon fund to be spent on public transportation and renewable energy. ejlive ableg SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('Questions about the LancetGH report?  asklancet tweet chat happening now! actonclimate ClimateHealth SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('We

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.75      0.98      0.85       123

   micro avg       0.75      0.90      0.82       134
   macro avg       0.37      0.49      0.42       134
weighted avg       0.69      0.90      0.78       134

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.75      0.98      0.85       123

   micro avg       0.75      0.90      0.82       134
   macro avg       0.38      0.49      0.43       134
weighted avg       0.69      0.90      0.78       134

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.76      0.98      0.86       123

   micro avg       0.76      0.90      0.82       134
   macro avg       0.38      0.49      0.43       134
weighted avg       0.69      0.90      0.78       134

Iteration: 3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.75      0.98      0.85       123

   micro avg       0.75      0.90      0.82       134
   macro avg       0.38      0.49      0.43       134
weighted avg       0.69      0.90      0.78       134

Iteration: 4
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.76      0.97      0.85       123

   micro avg       0.76      0.89      0.82       134
   macro avg       0.38      0.48      0.43       134
weighted avg       0.70      0.89      0.78       134

Iteration: 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.76      0.97      0.85       123

   micro avg       0.76      0.89      0.82       134
   macro avg       0.38      0.48      0.43       134
weighted avg       0.70      0.89      0.78       134

Iteration: 6
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.76      0.97      0.85       123

   micro avg       0.76      0.89      0.82       134
   macro avg       0.38      0.48      0.43       134
weighted avg       0.70      0.89      0.78       134

Iteration: 7


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.76      0.96      0.85       123

   micro avg       0.76      0.88      0.82       134
   macro avg       0.38      0.48      0.42       134
weighted avg       0.70      0.88      0.78       134

Iteration: 8
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.96      0.86       123

   micro avg       0.77      0.88      0.82       134
   macro avg       0.39      0.48      0.43       134
weighted avg       0.71      0.88      0.78       134

Iteration: 9


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.94      0.85       123

   micro avg       0.77      0.87      0.81       134
   macro avg       0.38      0.47      0.42       134
weighted avg       0.71      0.87      0.78       134

Iteration: 10
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.93      0.84       123

   micro avg       0.77      0.86      0.81       134
   macro avg       0.38      0.47      0.42       134
weighted avg       0.70      0.86      0.77       134

Iteration: 11


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.93      0.84       123

   micro avg       0.77      0.86      0.81       134
   macro avg       0.38      0.47      0.42       134
weighted avg       0.70      0.86      0.77       134

Iteration: 12
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.93      0.84       123

   micro avg       0.77      0.85      0.81       134
   macro avg       0.39      0.46      0.42       134
weighted avg       0.71      0.85      0.77       134

Iteration: 13


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.91      0.84       123

   micro avg       0.77      0.84      0.80       134
   macro avg       0.39      0.46      0.42       134
weighted avg       0.71      0.84      0.77       134

Iteration: 14
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.90      0.83       123

   micro avg       0.77      0.83      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.83      0.76       134

Iteration: 15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.90      0.83       123

   micro avg       0.77      0.83      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.83      0.76       134

Iteration: 16
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.78      0.90      0.83       123

   micro avg       0.78      0.83      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.83      0.77       134

Iteration: 17


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.89      0.83       123

   micro avg       0.77      0.82      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.82      0.76       134

Iteration: 18
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.89      0.83       123

   micro avg       0.77      0.82      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.82      0.76       134

Iteration: 19
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.77      0.89      0.83       123

   micro avg       0.77      0.82      0.80       134
   macro avg       0.39      0.45      0.42       134
weighted avg       0.71      0.82      0.76   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.73      1.00      0.84       123

   micro avg       0.73      0.92      0.81       134
   macro avg       0.36      0.50      0.42       134
weighted avg       0.67      0.92      0.77       134

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.78      0.38      0.51       123

   micro avg       0.78      0.35      0.48       134
   macro avg       0.39      0.19      0.26       134
weighted avg       0.72      0.35      0.47       134

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.73      1.00      0.85       123

   micro avg       0.73      0.92      0.81       134
   macro avg       0.37      0.50      0.42       134
weighted avg       0.67      0.92      0.78       134

Iteration: 3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.75      0.96      0.84       123

   micro avg       0.75      0.88      0.81       134
   macro avg       0.38      0.48      0.42       134
weighted avg       0.69      0.88      0.77       134

Iteration: 4


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.80      0.84      0.82       123

   micro avg       0.80      0.77      0.78       134
   macro avg       0.40      0.42      0.41       134
weighted avg       0.73      0.77      0.75       134

Iteration: 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.81      0.85      0.83       123

   micro avg       0.81      0.78      0.79       134
   macro avg       0.40      0.42      0.41       134
weighted avg       0.74      0.78      0.76       134

Iteration: 6


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.81      0.80      0.80       123

   micro avg       0.81      0.73      0.77       134
   macro avg       0.40      0.40      0.40       134
weighted avg       0.74      0.73      0.74       134

Iteration: 7


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.80      0.80      0.80       123

   micro avg       0.80      0.74      0.77       134
   macro avg       0.40      0.40      0.40       134
weighted avg       0.74      0.74      0.74       134

Iteration: 8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.81      0.82      0.82       123

   micro avg       0.81      0.75      0.78       134
   macro avg       0.41      0.41      0.41       134
weighted avg       0.75      0.75      0.75       134

Iteration: 9
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.80      0.85      0.82       123

   micro avg       0.79      0.78      0.78       134
   macro avg       0.40      0.42      0.41       134
weighted avg       0.73      0.78      0.75       134

Iteration: 10
              precision    recall  f1-score   support

     AGAINST       0.00      0.00      0.00        11
       FAVOR       0.80      0.84      0.82       123

   micro avg       0.79      0.77      0.78       134
   macro avg       0.40      0.42      0.41       134
weighted avg       0.74      0.77      0.75    

In [50]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_climate_change")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: Trump's travel ban still out of favor with court A federal appellate court on Thursday refused to reinstate Trump's travel ban. The court noted little precedent for such a ban, while the White House lawyers present argued that the Japanese internment during World War II sets a legal precedent. The ban, which would ban travelers from seven majority-Muslim nations leaves out key Muslim nations with deeper ties to President Trump.
Gold Label:fake
 Predicted Label:
{'AGAINST': 0.002197574358433485, 'FAVOR': 0.7536566257476807, 'NONE': 0.24414588510990143}


#Atheism

In [None]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Atheism.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Atheism.tsv')
print(len(test_data))

AGAINST    304
NONE       117
FAVOR       92
Name: Stance, dtype: int64
[('dear lord thank u for all of ur blessings forgive my sins lord give me strength and energy for this busy day ahead blessed hope SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Blessed are the peacemakers, for they shall be called children of God. Matthew 5:9 scripture peace SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('I am not conformed to this world. I am transformed by the renewing of my mind. ISpeakLife God 2014 SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Salah should be prayed with focus and understanding. Allah warns against lazy prayers done just for show Surah Al-Maoon 107:4-6 SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('And stay in your houses and do not display yourselves like that of the times of ignorance." [Quran 33:33].islam SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('If we are unsure whether something is halal or haram, we sh

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 4


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 6


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 7


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 8
              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       1.00      0.03      0.06        32

   micro avg       0.73      0.84      0.78       192
   macro avg       0.87      0.52      0.45       192
weighted avg       0.78      0.84      0.71       192

Iteration: 9
              precision    recall  f1-score   support

     AGAINST       0.73      0.99      0.84       160
       FAVOR       0.50      0.03      0.06        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.62      0.51      0.45       192
weighted avg       0.69      0.83      0.71     

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.73      1.00      0.84       160
       FAVOR       0.00      0.00      0.00        32

   micro avg       0.73      0.83      0.78       192
   macro avg       0.36      0.50      0.42       192
weighted avg       0.61      0.83      0.70       192

Iteration: 2
              precision    recall  f1-score   support

     AGAINST       0.76      0.90      0.83       160
       FAVOR       0.20      0.03      0.05        32

   micro avg       0.75      0.76      0.75       192
   macro avg       0.48      0.47      0.44       192
weighted avg       0.67      0.76      0.70       192

Iteration: 3
              precision    recall  f1-score   support

     AGAINST       0.81      0.83      0.82       160
       FAVOR       0.33      0.12      0.18        32

   micro avg       0.77      0.71      0.74       192
   macro avg       0.57      0.48      0.50       192
weighted avg       0.73      0.71      0.71     

In [None]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_Atheism")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: If only dreams were real, now it's gone. SingleBecause getonyourfeet SemST
Gold Label:AGAINST
 Predicted Label:
{'AGAINST': 0.09605187922716141, 'FAVOR': 0.55244380235672, 'NONE': 0.3515043556690216}


#Legalization of abortion

In [None]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Legalization of Abortion.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Legalization of Abortion.tsv')
print(len(test_data))

AGAINST    355
NONE       177
FAVOR      121
Name: Stance, dtype: int64
[('Just laid down the law on abortion in my bioethics class. Catholic SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("tooprettyclub Are you OK with GOP males telling you what you can and can't do with your own body? SemST", {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("If you don't want your kid, put it up for adoption. sorrynotsorry SemST", {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('RedAlert -there should be a "stigma" to butchering pre-born children - its a horrendous crime against humanity.  murder SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("But isn't that the problem then. Not enough faith. gaystapo socialism SemST", {'cats': {'AGAINST': 0, 'FAVOR': 0, 'NONE': 1}}), ('Life is our first and most basic human right. SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("Rise & Shine its a new day & you're alive. Thank God 4 another day of precious life. Christian Cat

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 2
              precision    recall  f1-score   support

     AGAINST       0.68      0.95      0.79       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.76      0.72       235
   macro avg       0.34      0.47      0.40       235
weighted avg       0.55      0.76      0.64       235

Iteration: 3
              precision    recall  f1-score   support

     AGAINST       0.73      0.75      0.74       189
       FAVOR       0.56      0.39      0.46        46

   micro avg       0.71      0.68      0.69       235
   macro avg       0.65      0.57      0.60       235
weighted avg       0.70      0.68      0.68     

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.74       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.74       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.74       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 4


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.74       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.74       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 6
              precision    recall  f1-score   support

     AGAINST       0.68      0.99      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.67      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 7
              precision    recall  f1-score   support

     AGAINST       0.68      0.99      0.80       189
       FAVOR       0.50      0.02      0.04        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.59      0.51      0.42       235
weighted avg       0.64      0.80      0.66     

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.68      1.00      0.81       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.68      0.80      0.73       235
   macro avg       0.34      0.50      0.40       235
weighted avg       0.54      0.80      0.65       235

Iteration: 2
              precision    recall  f1-score   support

     AGAINST       0.67      0.89      0.77       189
       FAVOR       0.00      0.00      0.00        46

   micro avg       0.67      0.71      0.69       235
   macro avg       0.34      0.44      0.38       235
weighted avg       0.54      0.71      0.62       235

Iteration: 3
              precision    recall  f1-score   support

     AGAINST       0.73      0.71      0.72       189
       FAVOR       0.50      0.33      0.39        46

   micro avg       0.70      0.64      0.67       235
   macro avg       0.61      0.52      0.56       235
weighted avg       0.68      0.64      0.66     

In [None]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_legalization_of_abortion")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: The government has given no explanation of why the law was changed macedonia HRCtte SemST
Gold Label:AGAINST
 Predicted Label:
{'AGAINST': 0.12442812323570251, 'FAVOR': 0.3234978914260864, 'NONE': 0.5520740151405334}


#Hilary_clinton

In [None]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Hillary Clinton.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Hillary Clinton.tsv')
print(len(test_data))

AGAINST    393
NONE       178
FAVOR      118
Name: Stance, dtype: int64
[('tedcruz And, HandOverTheServer she wiped clean + 30k deleted emails, explains dereliction of duty/lies re Benghazi,etc tcot SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Hillary is our best choice if we truly want to continue being a progressive nation. Ohio SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("TheView I think our country is ready for a female pres, it can't ever be Hillary SemST", {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("I just gave an unhealthy amount of my hard-earned money away to the big gov't & untrustworthy IRS. WhyImNotVotingForHillary SemST", {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('PortiaABoulger Thank you for adding me to your list SemST', {'cats': {'AGAINST': 0, 'FAVOR': 0, 'NONE': 1}}), ("Hillary can not win. Here's hoping the Dems offer a real candidate like Warren. Warren2016 SemST", {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Resp

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 2


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 4


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 6


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      0.99      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 7


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.59      0.99      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.59      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.47      0.79      0.59       217

Iteration: 8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.60      0.98      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.60      0.78      0.67       217
   macro avg       0.30      0.49      0.37       217
weighted avg       0.47      0.78      0.59       217

Iteration: 9


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.60      0.98      0.75       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.60      0.78      0.68       217
   macro avg       0.30      0.49      0.37       217
weighted avg       0.48      0.78      0.59       217

Iteration: 10


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.61      0.98      0.75       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.61      0.78      0.68       217
   macro avg       0.31      0.49      0.38       217
weighted avg       0.48      0.78      0.60       217

Iteration: 11


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.61      0.98      0.75       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.61      0.78      0.69       217
   macro avg       0.31      0.49      0.38       217
weighted avg       0.49      0.78      0.60       217

Iteration: 12


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.61      0.98      0.75       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.61      0.78      0.69       217
   macro avg       0.31      0.49      0.38       217
weighted avg       0.49      0.78      0.60       217

Iteration: 13
              precision    recall  f1-score   support

     AGAINST       0.62      0.98      0.76       172
       FAVOR       1.00      0.02      0.04        45

   micro avg       0.62      0.78      0.69       217
   macro avg       0.81      0.50      0.40       217
weighted avg       0.70      0.78      0.61       217

Iteration: 14
              precision    recall  f1-score   support

     AGAINST       0.62      0.97      0.76       172
       FAVOR       1.00      0.07      0.12        45

   micro avg       0.62      0.78      0.70       217
   macro avg       0.81      0.52      0.44       217
weighted avg       0.70      0.78      0.63   

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 1


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

     AGAINST       0.58      1.00      0.74       172
       FAVOR       0.00      0.00      0.00        45

   micro avg       0.58      0.79      0.67       217
   macro avg       0.29      0.50      0.37       217
weighted avg       0.46      0.79      0.58       217

Iteration: 2
              precision    recall  f1-score   support

     AGAINST       0.61      0.97      0.75       172
       FAVOR       1.00      0.02      0.04        45

   micro avg       0.61      0.77      0.68       217
   macro avg       0.80      0.50      0.39       217
weighted avg       0.69      0.77      0.60       217

Iteration: 3
              precision    recall  f1-score   support

     AGAINST       0.64      0.91      0.75       172
       FAVOR       1.00      0.11      0.20        45

   micro avg       0.64      0.75      0.69       217
   macro avg       0.82      0.51      0.47       217
weighted avg       0.71      0.75      0.64     

In [None]:
textcat_ensemble = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_hiliary_clinton")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: BeladonnaRogers Chairmnoomowmow The compliant media allowed themeselves to b herded by a couple of strands of rope? Good sheep! SemST
Gold Label:AGAINST
 Predicted Label:
{'AGAINST': 0.06873271614313126, 'FAVOR': 0.5350690484046936, 'NONE': 0.3961983025074005}


#2.TODO Reuse the above code to train a new classifier for fake news using the celebrity and the fake news datasets:

Data: "/content/drive/My Drive/Colab Notebooks/2022-ILTAPP/datasets/fake_rada"

2.1 HINT: You need to (i) load the data into a pandas dataframe; (ii) modify the labels from the converter and training functions.

2.2 HINT:Once you have a pandas dataframe, it is easy to split the data into 80% for training and 20% for testing.

#Celebrity_daata_set

In [51]:
'''This function takes in a list of file names and reads in the data in those files using the pandas library. 
The data is read in as a pandas dataframe with columns "label" and "text".
 The function concatenates all the dataframes into one dataframe and returns it.'''

def cleaning_of_data(filenames):
    text = []
    for filename in filenames:
        text.append(pd.read_csv(filename, sep='\t', encoding='utf-8', names=['label', 'text']))
    text = pd.concat(text)
    return text

cleb_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/celebrity_full.tsv"
celeb_data = cleaning_of_data([cleb_file])
print(celeb_data)

     label                                               text
0    legit  Jennifer Aniston dashes 'Friends' reunion hope...
1    legit  This Is What Brad Pitt Has Been Texting Jennif...
2    legit  Jennifer Aniston's spokesman denies reports th...
3    legit  Jennifer Aniston sparks adoption rumors Before...
4    legit  Jennifer Aniston denies she had an affair with...
..     ...                                                ...
495   fake  Devastated Johnny Depp Begging For Ex-Wife Van...
496   fake  New Suicide Fears For Owen Wilson After Dad’s ...
497   fake  Did Taylor Swift Leak Her Romance With Joe Alw...
498   fake  Is Ryan Seacrest Quitting 'Live'? Ryan Seacres...
499   fake  Digital Diva! Inside Caitlyn Jenner’s Secret C...

[500 rows x 2 columns]


In [52]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(celeb_data, test_size=0.2)

print(train)
print(test)

     label                                               text
475   fake  Blue Ivy To Play Nurse For Pregnant Mom Beyonc...
341   fake  Drake Facing Gay Rumors After Getting Frisky W...
361   fake  Leah Remini Opens Up About Her Friendship With...
277   fake  Donald Trump and First Lady Melania Keep Separ...
49   legit  Here's How Kim Kardashian West, Kanye West, an...
..     ...                                                ...
99   legit  WATCH: Donald Trump's Ex-Wife Marla Maples Get...
191  legit   Kelly Ripa and Ryan Seacrest tear up on air t...
442   fake  Apparently Drake Wants to Settle Down With a W...
390   fake  Ryan Gosling says he gave up smoking after fil...
40   legit  Kylie Jenner Sets the Record Straight About Re...

[400 rows x 2 columns]
     label                                               text
468   fake  FKA Twigs Looks Miserable With Robert Pattinso...
492   fake  Victoria Beckham's big fashion guide: Posh Spi...
134  legit  Brad Pitt Responds To Rumours He’s

In [53]:
#Splitting of the celebrity data set inti train and test
train.to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/train_celebrity.tsv", sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")
test.to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/test_celebrity.tsv", sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")

In [29]:
def fake_data_to_spacy(fname):
  training_data = pd.read_csv(fname, sep='\t', encoding='utf-8')
  #train_data.dropna(axis = 0, how ='any',inplace=True)
  #train_data['Nef um_words_text'] = train_data['text'].apply(lambda x:len(str(x).split())) 
  #mask = train_data['Num_words_text'] >2
  #train_data = train_data[mask]
   
  train_texts = training_data['text'].tolist()
  train_cats = training_data['label'].tolist()
  final_train_cats=[]
  for cat in train_cats:
    cat_list = {}
    if cat == 'fake':
      cat_list['fake'] =  1
      cat_list['legit'] =  0
    else:
      cat_list['fake'] =  0
      cat_list['legit'] =  1
    final_train_cats.append(cat_list)
    
  train_data = list(zip(train_texts, [{"cats": cats} for cats in final_train_cats]))
  return train_data, train_texts, train_cats

In [54]:
training_data, train_texts, train_cats = fake_data_to_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/train_celebrity.tsv')
print(training_data[0])
print(len(training_data))
test_data, test_texts, test_cats = fake_data_to_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/test_celebrity.tsv')
print(len(test_data))

('Blue Ivy To Play Nurse For Pregnant Mom Beyoncé When\xa0Beyoncé\xa0goes into labor, she’ll have one very special helper by her side: 5-year-old daughter\xa0Blue Ivy! According to an insider, the superstar and hubby\xa0Jay Z have agreed to let Blue play nurse when the singer gives birth to twins, which the insider claims will happen via C-section in June “Blue will be one of the first to hold the babies, with help from Jay, of course, and she’ll be responsible for writing down their height and weight,” shares the insider. “She’ll even get to help tie the ID bracelets on their wrists.” Though some might be hesitant to have a child in the operating room, Bey and Jay “want to make Blue feel included in anything related to bringing the twins into the world,” the insider says. “She’s excited — and they don’t want her to be jealous.” Paging Nurse Blue! ', {'cats': {'fake': 1, 'legit': 0}})
400
100


In [55]:
def Sort(sub_li):
  # reverse = True (Soresulting_list = list(first_list)rts in Descending  order) 
  # key is set to sort using second element of  
  # sublist lambda has been used 
  return(sorted(sub_li, key = lambda x: x[1],reverse=True))  

# run the predictions on each sentence in the evaluation  dataset, and return the metrics
def evaluate(tokenizer, textcat, test_texts, test_cats ):
  docs = (tokenizer(text) for text in test_texts)
  preds = []
  for i, doc in enumerate(textcat.pipe(docs)):
  
    scores = Sort(doc.cats.items())
  
    catList=[]
    for score in scores:
      catList.append(score[0])
    preds.append(catList[0])
        
 
  labels = ['fake','legit']
  print(classification_report(test_cats, preds,labels=labels))

In [56]:
def train_spacy(  train_data, iterations,test_texts,test_cats, model_arch, dropout = 0.3, model=None, init_tok2vec=None):
    ''' Train a spacy model, which can be queried against with test data
   
    train_data : training data in the format of (sentence, {cats: ['fake'|'legit']})
    labels : a list of unique annotations
    iterations : number of training iterations
    dropout : dropout proportion for training
    display_freq : number of epochs between logging losses to console
    '''
    
    nlp = spacy.load('en_core_web_sm')
    

    # add the text classifier to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "textcat" not in nlp.pipe_names:
        textcat = nlp.create_pipe(
            "textcat", config={"exclusive_classes": True, "architecture": model_arch}
        )
        nlp.add_pipe(textcat, last=True)
        
    # otherwise, get it, so we can add labels to it
    else:
        textcat = nlp.get_pipe("textcat")

    # add label to text classifier
    textcat.add_label('fake')
    textcat.add_label('legit')

    # get names of other pipes to disable them during training
    pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    with nlp.disable_pipes(*other_pipes):  # only train textcat
        optimizer = nlp.begin_training()
        if init_tok2vec is not None:
            with init_tok2vec.open("rb") as file_:
                textcat.model.tok2vec.from_bytes(file_.read())
        print("Training the model...")
        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
        batch_sizes = compounding(16.0, 64.0, 1.5)
        for i in range(iterations):
            print('Iteration: '+str(i))
            start_time = time.process_time()
            losses = {}
            # batch up the examples using spaCy's minibatch
            random.shuffle(train_data)
            batches = minibatch(train_data, size=batch_sizes)
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=dropout, losses=losses)
            with textcat.model.use_params(optimizer.averages):
                # evaluate on the test data 
                evaluate(nlp.tokenizer, textcat, test_texts,test_cats)
            print ('Elapsed time'+str(time.process_time() - start_time)+  "seconds")
        with nlp.use_params(optimizer.averages):
            #model_name = model_arch + "_celebrity"
            model_name = model_arch + "_fakenews"
            filepath = "/content/drive/MyDrive/NLP_Applications_1/DATA" + model_name 
            nlp.to_disk(filepath)
    return nlp

In [57]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        fake       0.00      0.00      0.00        45
       legit       0.55      1.00      0.71        55

    accuracy                           0.55       100
   macro avg       0.28      0.50      0.35       100
weighted avg       0.30      0.55      0.39       100

Elapsed time3.121349699999996seconds
Iteration: 1
              precision    recall  f1-score   support

        fake       0.46      0.84      0.60        45
       legit       0.61      0.20      0.30        55

    accuracy                           0.49       100
   macro avg       0.54      0.52      0.45       100
weighted avg       0.54      0.49      0.44       100

Elapsed time4.041491268000016seconds
Iteration: 2
              precision    recall  f1-score   support

        fake       0.63      0.69      0.66        45
       legit       0.73      0.67      0.70        55

    accuracy                           0.68       100
   macro avg       0.68     

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        fake       0.47      1.00      0.64        47
       legit       0.00      0.00      0.00        53

    accuracy                           0.47       100
   macro avg       0.23      0.50      0.32       100
weighted avg       0.22      0.47      0.30       100

Elapsed time22.04621323699996seconds
Iteration: 1
              precision    recall  f1-score   support

        fake       1.00      0.06      0.12        47
       legit       0.55      1.00      0.71        53

    accuracy                           0.56       100
   macro avg       0.77      0.53      0.41       100
weighted avg       0.76      0.56      0.43       100

Elapsed time19.94427032699997seconds
Iteration: 2
              precision    recall  f1-score   support

        fake       0.65      0.89      0.75        47
       legit       0.86      0.57      0.68        53

    accuracy                           0.72       100
   macro avg       0.75     

In [None]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_celebrity")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: A Detailed History of Selena Gomez and Justin Bieber’s On-Again, Off-Again Relationship 2,518 days ago, Justin Bieber and Selena Gomez were spotted—arm in arm—at a Philadelphia IHOP. And so began the long, complicated, and endless saga of “Jelena,” which, in its latest of many twists, has the on-again, off-again couple “hanging out” again amidst news of Gomez’s breakup with The Weeknd. How did we get here, exactly? You’re forgiven for not knowing. After all, IHOP was centuries ago, or at least 2010, when Barack Obama was a first-term president and America watched American Idol and Survivor . Here’s a comprehensive guide to their on-and-off, up-and-down, are-they-or-aren’t-they relationship. TMZ breaks the news that the teen idols enjoyed a “cuddly” date at an IHOP in Philadelphia ahead of a Q102’s Jingle Ball. Gomez tells Us Weekly that it was platonic, and that Bieber is “one of my best friends . . . It was just pancakes!” Bieber, too, stays mum on the subject, telling MTV that 

#Fake_news_dataset.

In [58]:
fake_news = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/fake_news_full.tsv"
fake_news_data = cleaning_of_data([fake_news])
print(fake_news_data)

     label                                               text
0     fake  Alex Jones Vindicated in "Pizzagate" Controver...
1     fake  THE BIG DATA CONSPIRACY Government and Silicon...
2     fake  California Surprisingly Lenient on Auto Emissi...
3     fake  Mexicans Are Chomping at the Bit to Stop NAFTA...
4     fake  Breaking News: Snapchat to purchase Twitter fo...
..     ...                                                ...
472  legit  Machine Learning Opens Up New Ways to Help Dis...
473  legit  YouTube automates sound effect captions with A...
474  legit  Solar-powered 'skin' could make prosthetics mo...
475  legit  Uber Self-Driving Car Tests Resume Three Days ...
476  legit  Apple's Devices Lose Luster in American Classr...

[477 rows x 2 columns]


In [40]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(fake_news_data, test_size=0.2)

print(train)
print(test)

     label                                               text
154   fake  Poll: Trump's approval rating soars President ...
338  legit  'Walking Dead' star Lauren Cohan on season 7 f...
395  legit  Schwarzenegger taunts Trump over approval rati...
285  legit  Arne Duncan Blasts Trump Administration Over T...
48    fake  Arne Duncan, Trump's Transgender Foe Former Se...
..     ...                                                ...
140   fake  Judge tells Stein, 'Your money's no good here!...
144   fake  Rhona Graff, Trump's GateKeeper Rhona GRaff, l...
126   fake  Women arrested three times is Melania Trump (C...
453  legit  Apple cuts prices on lower-end iPads, releases...
161   fake  SEXIST RORY MCILROY CALLS VOTE FOR FEMALE MEMB...

[381 rows x 2 columns]
     label                                               text
30    fake  Sweden Warned Not to Return to Low-Tax 50s as ...
360  legit  President Trump climbs into an 18-wheeler and ...
272  legit   Jeff Bezos tests giant robot suit

In [42]:
train.to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/train_fake_news.tsv", sep="\t", index=False, quoting=csv.QUOTE_NONE,quotechar="", escapechar="\\")
test.to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/test_fake_news.tsv", sep="\t", index=False, quoting=csv.QUOTE_NONE,quotechar="", escapechar="\\")

In [45]:
training_data, train_texts, train_cats = fake_data_to_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/train_fake_news.tsv')
print(training_data[0])
print(len(training_data))
test_data, test_texts, test_cats = fake_data_to_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/fake_rada/test_fake_news.tsv')
print(len(test_data))

("Poll: Trump's approval rating soars President Donald Trump's approval rating has soared to a new high of 83 percent in the Quinnipiac University poll. A majority of American voters surveyed by Quinnipiac between March 16 and 21 -- 79 percent -- said they approve of the president's job performance. Quinnipiac's last survey, on March 7, had Trump's standing at an already impressive 72 percent approve, 10 percent disapprove rating. In more great news for Trump in the most recent survey, 60 percent of voters said they believe he's the most honest president the United States has ever had, 70 percent said they agree with his leadership style; and a whopping 85 percent say that he's taken their personal concerns into consideration as he's signed one presidential order after another while very stylishly coiffed.", {'cats': {'fake': 1, 'legit': 0}})
384
96


In [46]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0
              precision    recall  f1-score   support

        fake       0.53      0.43      0.47        47
       legit       0.53      0.63      0.58        49

    accuracy                           0.53        96
   macro avg       0.53      0.53      0.53        96
weighted avg       0.53      0.53      0.53        96

Elapsed time1.147335709999993seconds
Iteration: 1
              precision    recall  f1-score   support

        fake       0.51      0.55      0.53        47
       legit       0.53      0.49      0.51        49

    accuracy                           0.52        96
   macro avg       0.52      0.52      0.52        96
weighted avg       0.52      0.52      0.52        96

Elapsed time0.5560046209999996seconds
Iteration: 2
              precision    recall  f1-score   support

        fake       0.51      0.47      0.49        47
       legit       0.53      0.57      0.55        49

    accuracy          

In [47]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "simple_cnn")

Training the model...
LOSS 	  P  	  R  	  F  
Iteration: 0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

        fake       0.49      1.00      0.66        47
       legit       0.00      0.00      0.00        49

    accuracy                           0.49        96
   macro avg       0.24      0.50      0.33        96
weighted avg       0.24      0.49      0.32        96

Elapsed time8.156693473000018seconds
Iteration: 1
              precision    recall  f1-score   support

        fake       0.53      0.85      0.65        47
       legit       0.65      0.27      0.38        49

    accuracy                           0.55        96
   macro avg       0.59      0.56      0.51        96
weighted avg       0.59      0.55      0.51        96

Elapsed time5.864149893000018seconds
Iteration: 2
              precision    recall  f1-score   support

        fake       0.60      0.55      0.58        47
       legit       0.60      0.65      0.63        49

    accuracy                           0.60        96
   macro avg       0.60     

In [48]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_fakenews")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: Trump's travel ban still out of favor with court A federal appellate court on Thursday refused to reinstate Trump's travel ban. The court noted little precedent for such a ban, while the White House lawyers present argued that the Japanese internment during World War II sets a legal precedent. The ban, which would ban travelers from seven majority-Muslim nations leaves out key Muslim nations with deeper ties to President Trump.
Gold Label:fake
 Predicted Label:
{'fake': 0.4192053973674774, 'legit': 0.5807945728302002}


#3.TODO Try the different spacy language models to see the difference in performance.

In [None]:
# data path. trial data used as training too.
trial_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-trialdata.utf-8.txt"
train_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-trainingdata.utf-8.txt"
test_file = "/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-testdata-gold.txt"

training_data, targets = load_data([trial_file, train_file])
training_data['Clean_tweet'] = training_data['Tweet'].apply(cleanup)

test_data, _ = load_data([test_file])
test_data['Clean_tweet'] = test_data['Tweet'].apply(cleanup)
display(training_data)

Unnamed: 0,ID,Target,Tweet,Stance,Clean_tweet
0,1,Hillary Clinton,"@tedcruz And, #HandOverTheServer she wiped cle...",AGAINST,"tedcruz And, HandOverTheServer she wiped clean..."
1,2,Hillary Clinton,Hillary is our best choice if we truly want to...,FAVOR,Hillary is our best choice if we truly want to...
2,3,Hillary Clinton,@TheView I think our country is ready for a fe...,AGAINST,TheView I think our country is ready for a fem...
3,4,Hillary Clinton,I just gave an unhealthy amount of my hard-ear...,AGAINST,I just gave an unhealthy amount of my hard-ear...
4,5,Hillary Clinton,@PortiaABoulger Thank you for adding me to you...,NONE,PortiaABoulger Thank you for adding me to your...
...,...,...,...,...,...
2809,2910,Legalization of Abortion,"There's a law protecting unborn eagles, but no...",AGAINST,"There's a law protecting unborn eagles, but no..."
2810,2911,Legalization of Abortion,I am 1 in 3... I have had an abortion #Abortio...,AGAINST,I am 1 in 3... I have had an abortion Abortion...
2811,2912,Legalization of Abortion,How dare you say my sexual preference is a cho...,AGAINST,How dare you say my sexual preference is a cho...
2812,2913,Legalization of Abortion,"Equal rights for those 'born that way', no rig...",AGAINST,"Equal rights for those 'born that way', no rig..."


In [None]:
for target in targets:
  training_data[training_data['Target'] == target][['Stance', 'Clean_tweet']].to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.{target}.tsv",
          sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")
  test_data[test_data['Target'] == target][['Stance', 'Clean_tweet']].to_csv(f"/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.{target}.tsv",
          sep="\t", index=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar="")

In [None]:
'''The function loads a CSV file into a pandas dataframe "training_data".
It then outputs the count of each unique value in the "Stance" column.
The "Clean_tweet" and "Stance" columns are extracted into lists "train_texts" and "train_cats".
A list of dictionaries "final_train_cats" with binary values is created to represent the stance categories.
The function returns the list of tuples "train_data", the "train_texts" and "train_cats"
'''

def load_data_spacy(fname):
  training_data = pd.read_csv(fname, sep='\t', encoding='utf-8')
  #train_data.dropna(axis = 0, how ='any',inplace=True)
  #train_data['Num_words_text'] = train_data['text'].apply(lambda x:len(str(x).split())) 
  #mask = train_data['Num_words_text'] >2
  #train_data = train_data[mask]
  print(training_data['Stance'].value_counts())
   
  train_texts = training_data['Clean_tweet'].tolist()
  train_cats = training_data['Stance'].tolist()
  final_train_cats=[]
  for cat in train_cats:
    cat_list = {}
    if cat == 'AGAINST':
      cat_list['AGAINST'] =  1
      cat_list['FAVOR'] =  0
      cat_list['NONE'] =  0
    elif cat == 'FAVOR':
      cat_list['AGAINST'] =  0
      cat_list['FAVOR'] =  1
      cat_list['NONE'] =  0
    else:
      cat_list['AGAINST'] =  0
      cat_list['FAVOR'] =  0
      cat_list['NONE'] =  1
    final_train_cats.append(cat_list)
    
  train_data = list(zip(train_texts, [{"cats": cats} for cats in final_train_cats]))
  return train_data, train_texts, train_cats

In [None]:
training_data, train_texts, train_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/semeval2016-task6-train.Feminist Movement.tsv')
print(training_data[:10])
print(len(training_data))
test_data, test_texts, test_cats = load_data_spacy('/content/drive/MyDrive/NLP_Applications_1/DATA/2023-ILTAPP-20230203T201734Z-001/2023-ILTAPP/datasets/stance-semeval2016/SemEval2016-Task6-subtaskA-test.Feminist Movement.tsv')
print(len(test_data))

AGAINST    328
FAVOR      210
NONE       126
Name: Stance, dtype: int64
[('Always a delight to see chest-drumming alpha males hiss and scuttle backwards up the wall when a feminist enters the room. manly SemST', {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("Sometimes I overheat and want to take off my shirt but can't because of social expectations of people with breasts. ;n; SemST", {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ('If feminists spent 1/2 as much time reading papers as they do tumblr they would be real people, not ignorant sexist bigots. SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ('Stupid Feminists, the civilization you take for granted was built with the labour, blood sweat and tears of men. SemST', {'cats': {'AGAINST': 1, 'FAVOR': 0, 'NONE': 0}}), ("YOU'RE A GIRL AND HAVE A SEX DRIVE!? YOU MUST BE A SLUT! feminist SemST", {'cats': {'AGAINST': 0, 'FAVOR': 1, 'NONE': 0}}), ("Suns out....  Dresses out...  StreetHarassment out...  This shouldn't be 

In [None]:
'''This function sorts a list of sublists in descending order based on the second element of each sublist.
 The "key" parameter of the "sorted" function is set to a lambda function that returns the second element of each sublist, and the
  "reverse" parameter is set to "True", 
so the resulting list will be sorted in descending order. '''

def Sort(sub_li):
  # reverse = True (Soresulting_list = list(first_list)rts in Descending  order) 
  # key is set to sort using second element of  
  # sublist lambda has been used 
  return(sorted(sub_li, key = lambda x: x[1],reverse=True))  

# run the predictions on each sentence in the evaluation  dataset, and return the metrics
'''The "evaluate" function evaluates the performance of a tokenizer and a text categorization model on a set of test text data.
 It tokenizes the test texts, processes them through the text categorization model to get prediction scores for each text, extracts the top-rated category for each text,
  and compares the predicted categories with the true categories. The function then prints a performance evaluation report in terms of precision, recall, and F1-score for each class. '''
def evaluate(tokenizer, textcat, test_texts, test_cats ):
  docs = (tokenizer(text) for text in test_texts)
  preds = []
  for i, doc in enumerate(textcat.pipe(docs)):
    #print(doc.cats.items())
    scores = Sort(doc.cats.items())
    #print(scores)
    catList=[]
    for score in scores:
      catList.append(score[0])
    preds.append(catList[0])
        
  labels = ['AGAINST', 'FAVOR']
 
  print(classification_report(test_cats, preds,labels=labels))

In [None]:
def train_spacy(  train_data, iterations,test_texts,test_cats, model_arch, dropout = 0.3, model=None, init_tok2vec=None):
    ''' Train a spacy model, which can be queried against with test data
   
    train_data : training data in the format of (sentence, {cats: ['AGAINST'|'FAVOR'|'NONE']})
    labels : a list of unique annotations
    iterations : number of training iterations
    dropout : dropout proportion for training
    display_freq : number of epochs between logging losses to console
    '''
    
    nlp = spacy.load("en_core_web_lg")
    

    # add the text classifier to the pipeline if it doesn't exist
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "textcat" not in nlp.pipe_names:
        textcat = nlp.create_pipe(
            "textcat", config={"exclusive_classes": True, "architecture": model_arch}
        )
        nlp.add_pipe(textcat, last=True)
        
    # otherwise, get it, so we can add labels to it
    else:
        textcat = nlp.get_pipe("textcat")

    # add label to text classifier
    textcat.add_label("AGAINST")
    textcat.add_label("FAVOR")
    textcat.add_label("NONE")
   

    # get names of other pipes to disable them during training
    pipe_exceptions = ["textcat", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    with nlp.disable_pipes(*other_pipes):  # only train textcat
        optimizer = nlp.begin_training()
        if init_tok2vec is not None:
            with init_tok2vec.open("rb") as file_:
                textcat.model.tok2vec.from_bytes(file_.read())
        print("Training the model...")
        print("{:^5}\t{:^5}\t{:^5}\t{:^5}".format("LOSS", "P", "R", "F"))
        batch_sizes = compounding(16.0, 64.0, 1.5)
        for i in range(iterations):
            print('Iteration: '+str(i))
            #start_time = time.process_time()
            losses = {}
            # batch up the examples using spaCy's minibatch
            random.shuffle(train_data)
            batches = minibatch(train_data, size=batch_sizes)
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=dropout, losses=losses)
            with textcat.model.use_params(optimizer.averages):
                # evaluate on the test data 
                evaluate(nlp.tokenizer, textcat, test_texts,test_cats)
            #print ('Elapsed time'+str(time.process_time() - start_time)+  "seconds")
        with nlp.use_params(optimizer.averages):
            model_name = model_arch + "_MD_feminism" # used the medium data.
            #model_name = model_arch + "_hiliary_clinton"
            #model_name = model_arch + "_legalization_of_abortion"
            #model_name = model_arch + "_Atheism"
            #model_name = model_arch + "_climate_change"
            #model_name = model_arch + "_feminism"
            filepath = "/content/drive/MyDrive/NLP_Applications_1/DATA" + model_name 
            nlp.to_disk(filepath)
    return nlp

In [None]:
nlp = train_spacy(training_data, 20, test_texts, test_cats, "bow")

OSError: ignored

In [None]:
textcat_bow = spacy.load("/content/drive/MyDrive/NLP_Applications_1/DATAbow_MD_feminism")
tweets = textcat_bow(test_texts[10])
print("Text: "+ test_texts[10])
print("Gold Label:"+ test_cats[10])
print(" Predicted Label:") 
print(tweets.cats)
print("=======================================")

Text: sometiimes you just feel like punching a feminist in the face SemST
Gold Label:AGAINST
 Predicted Label:
{'AGAINST': 0.42913225293159485, 'FAVOR': 0.34359779953956604, 'NONE': 0.2272699624300003}
