In [101]:
from flair.models import TextClassifier
from flair.data import Sentence
from segtok.segmenter import split_single
import pandas as pd
import re


classifier = TextClassifier.load('en-sentiment')

2020-02-05 01:20:09,846 loading file C:\Users\Ryloid\.flair\models\imdb-v0.4.pt


In [102]:
def clean(raw):
    """ Remove hyperlinks and markup """
    result = re.sub("<[a][^>]*>(.+?)</[a]>", 'Link.', raw)
    result = re.sub('&gt;', "", result)
    result = re.sub('&#x27;', "'", result)
    result = re.sub('&quot;', '"', result)
    result = re.sub('&#x2F;', ' ', result)
    result = re.sub('<p>', ' ', result)
    result = re.sub('</i>', '', result)
    result = re.sub('&#62;', '', result)
    result = re.sub('<i>', ' ', result)
    result = re.sub("\n", '', result)
    return result

def make_sentences(text):
    """ Break apart text into a list of sentences """
    sentences = [sent for sent in split_single(text)]
    return sentences

def predict(sentence):
    """ Predict the sentiment of a sentence """
    if sentence == "":
        return 0
    text = Sentence(sentence)
    # stacked_embeddings.embed(text)
    classifier.predict(text)
    value = text.labels[0].to_dict()['value'] 
    if value == 'POSITIVE':
        result = text.to_dict()['labels'][0]['confidence']
    else:
        result = -(text.to_dict()['labels'][0]['confidence'])
    return round(result, 3)

def get_scores(sentences):
    """ Call predict on every sentence of a text """
    results = []
    
    for i in range(0, len(sentences)): 
        results.append(predict(sentences[i]))
    return results

def get_sum(scores):
    
    result = round(sum(scores), 3)
    return result


In [103]:
df = pd.read_json('small.json')
df = df.dropna()
df = df.reset_index(drop=True)
df.shape

(96, 3)

In [104]:
df.text = df.text.apply(clean)
df['sentences'] = df.text.apply(make_sentences)
df['scores'] = df['sentences'].apply(get_scores)
df['scores_sum'] = df.scores.apply(get_sum)

In [105]:
df.head()

Unnamed: 0,id,by,text,sentences,scores,scores_sum
0,8817348,BrainInAJar,it is possible to agree with the general tone ...,[it is possible to agree with the general tone...,[0.999],0.999
1,1909793,MisterWebz,Try Reddit. Wider audience.,"[Try Reddit., Wider audience.]","[0.581, 0.951]",1.532
2,1088996,ThinkWriteMute,I'd love to see an educational micro-kernel OS...,[I'd love to see an educational micro-kernel O...,[0.999],0.999
3,368085,tlrobinson,Perhaps the developer already knows JavaScript...,[Perhaps the developer already knows JavaScrip...,"[-0.686, -0.994]",-1.68
4,4224529,smacktoward,If someone presents himself to me as a profess...,[If someone presents himself to me as a profes...,[-0.675],-0.675


In [106]:
for x in range(0, 96):
    sample, scores_sum = df.loc[x]['text'], df.loc[x]['scores_sum']
    
    if scores_sum > 2:
        print("POSITIVE SCORE:", scores_sum)
        print(sample, "\n")
    elif scores_sum < -2:
        print("NEGATIVE SCORE:", scores_sum)
        print(sample, "\n")

POSITIVE SCORE: 2.862
Very nice project. I'm late to the party but here's my upvote! :) 

POSITIVE SCORE: 6.825
It likely would work. Facebook heavily hinted at strong social integration with the OR. I likely see Facebook doing this or at least they gave me that impression in their stock holders call. It makes a lot of sense. I certainly would use it. The big "draw" for me. Would be watching movies with my friends, in a full theater settings, despite continents of separation. 

POSITIVE SCORE: 3.198
Nice commands. Also 'sips' for batch resizing images. If you aren't on OSX you can try the following for a simple 'say' alternative. It has some great robotic singing voices, too. Link. 

POSITIVE SCORE: 2.143
This is how big companies work.  They want even the smallest task (like, "viewing documentation for the product we just bought") to take days and require multiple levels of managerial approval.  Because I guess it's cheaper to have 10 employees doing the same job than it is to get sue