In [1]:
pip install textblob




[notice] A new release of pip available: 22.3.1 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [26]:
from textblob import TextBlob
from textblob.taggers import PatternTagger
from textblob.sentiments import PatternAnalyzer
import json
import os

In [42]:
#classify the sentiment value into positive, neutral, and negative
analyzer = PatternAnalyzer() #POLARITY AND SUBJECTIVITY ANALYZER
tagger = PatternTagger() #PART OF SPEECH TAGGER


def pattern_analyzer_classify(sentence):
    score = analyzer.analyze(sentence)
    if score[0] > 0.1:
        return "pos"
    elif score[0] < 0.1:
        return "neg"
    else:
        return "neu"
    
#compare results of data using pattern analyzer compared to correct annotations
def pattern_analyzer_compare(data, annotations): #compares how many we correctly classify
    num_correct = 0
    total = 0
    sentence_list = []
    for i in data['body-paragraphs']:
        for j in i:
            sentence_list.append(j)
    for phrase in annotations["phrase-level-annotations"]:
        if(phrase["id"] == "title"):#line below is where we input our model
            if (pattern_analyzer_classify(data["title"]) == phrase["polarity"]):
                num_correct += 1
            total += 1
        else:
            sentence_id = int(phrase["id"][1:])#input model below here too
            if (pattern_analyzer_classify(sentence_list[sentence_id]) == phrase["polarity"]):
                num_correct += 1
            total += 1
    return total, num_correct

###TAGGER FUNCTIONS

def pattern_tag(sentence):
    blob = TextBlob(sentence, pos_tagger=tagger)
    return blob.pos_tags

def find_proper_names(pos_tags):
    tuple_list = []
    for pos_tuple in pos_tags:
        #find the NNP tagged words and return those tuples as a new list
        if pos_tuple[1] == 'NNP':
            tuple_list.append(pos_tuple)
    return tuple_list

def find_full_proper_names_list(pos_tags): #only accounts for first+last names
    name_list = []
    prev_tuple = ("___",'___')
    for pos_tuple in pos_tags:
        #find the NNP tagged words and return those tuples as a new list
        if pos_tuple[1] == 'NNP':
            if prev_tuple[1] == 'NNP':
                name_list.remove(prev_tuple[0])
                name_list.append(prev_tuple[0] + " " + pos_tuple[0])
            else:
                name_list.append(pos_tuple[0])
        prev_tuple = pos_tuple
    return name_list

#create another function of find_proper_names that combines full names
#basically check if two NNP words are next to each  other

In [48]:
###EXPLORATION START
pattern_analyzer1 = PatternAnalyzer()

test_sent1 = "I hate the things that Trump has done over the years."
test_sent2 = "In an Epic Battle of Tanks, Russia Was Routed, Repeating Earlier Mistakes"
test_sent3 = "Ukraine war live updates: Russian mercenary boss says ‘fierce resistance’ seen in Bakhmut; Kyiv says its fighters are under ‘insane pressure’"
test_sent4 = "What’s Worse for Donald Trump Than Getting Indicted?"
test_sent5 = "A fierce debate between Obama and Donald Trump ensued on tuesday."
#pattern analyzer returns a tuple of polarity, subjectivity
#and if you set it to true also a list of assessments made on 
#individual words

print("Pattern analyzer test1:")
print(pattern_analyzer1.analyze(test_sent1))
# test_analyzing2 = pattern_analyzer1.analyze(test_sent2,keep_assessments = True)
# print(test_analyzing2)
# print("Pattern test3: " + pattern_analyzer_classify(test_sent3))
# print(pattern_analyzer1.analyze(test_sent3))

print(pattern_analyzer1.analyze(test_sent1)[0])


#TAGGING EXPLORATION
tags1 = pattern_tag(test_sent1)
print(tags1)
print("\n")
print(find_proper_names(tags1))
print(find_full_proper_names_list(tags1))
print("\n")
tags4 = pattern_tag(test_sent4)
print(tags4)
print("\n")
print(find_proper_names(tags4))
print(find_full_proper_names_list(tags4))
print("\n")
tags5 = pattern_tag(test_sent5)
print(tags5)
print(find_full_proper_names_list(tags5))

Pattern analyzer test1:
Sentiment(polarity=-0.8, subjectivity=0.9)
-0.8
[('I', 'PRP'), ('hate', 'VBP'), ('the', 'DT'), ('things', 'NNS'), ('that', 'IN'), ('Trump', 'NNP'), ('has', 'VBZ'), ('done', 'VBN'), ('over', 'IN'), ('the', 'DT'), ('years', 'NNS')]


[('Trump', 'NNP')]
['Trump']


[('What', 'WP'), ('’', 'NN'), ('s', 'PRP'), ('Worse', 'JJR'), ('for', 'IN'), ('Donald', 'NNP'), ('Trump', 'NNP'), ('Than', 'IN'), ('Getting', 'VBG'), ('Indicted', 'VBN')]


[('Donald', 'NNP'), ('Trump', 'NNP')]
['Donald Trump']


[('A', 'DT'), ('fierce', 'JJ'), ('debate', 'NN'), ('between', 'IN'), ('Obama', 'NNP'), ('and', 'CC'), ('Donald', 'NNP'), ('Trump', 'NNP'), ('ensued', 'VBD'), ('on', 'IN'), ('tuesday', 'NN')]
['Obama', 'Donald Trump']


In [49]:
file_list = []

#open BASIL dataset and gather the articles w/o scores
for i in range(10):
    file_i = os.listdir("BASILdata/articles/201" + str(i))
    for file_name in file_i:
        file = open("BASILdata/articles/201" + str(i) + "/" + file_name, encoding="utf8")
        json_file = json.load(file)
        file_list.append(json_file)
        
annotation_file_list = []

#open BASIL dataset and gather annotations for articles
for i in range(10):
    file_i = os.listdir("BASILdata/annotations/201" + str(i))
    for file_name in file_i:
        file = open("BASILdata/annotations/201" + str(i) + "/" + file_name, encoding="utf8")
        json_file = json.load(file)
        annotation_file_list.append(json_file)

total_correct = 0
total = 0

#compare correct annotations to the results from our model
for i in range(len(file_list)):
    data = file_list[i]
    annotations = annotation_file_list[i]
    a, b = pattern_analyzer_compare(data, annotations)
    total += a
    total_correct += b
    
accuracy = total_correct/total
print("Pattern sentiment on BASIL correct: %.3f , BASIL total: %.3f" %(total_correct, total))
print("Pattern sentiment on BASIL ACCURACY: %.4f" % accuracy)

Pattern sentiment on BASIL correct: 1130.000 , BASIL total: 1726.000
Pattern sentiment on BASIL ACCURACY: 0.6547
