In [142]:
from news import NewsParser

import matplotlib.pyplot as plt
from pprint import pprint
from tqdm import tqdm
import spacy, os, random
import pandas as pd
from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier

fsize = (16, 9)
dpi = 120
plt.rcParams.update({"figure.figsize": fsize, "figure.dpi": dpi}) # set params


In [143]:
nlp = spacy.load("en_core_web_sm")

In [152]:
def breakdown_text(sentence):
    """
    Breakdown a sentence into the aspects and description
    """
    doc = nlp(sentence)
    descriptive_term = ''
    target = ''
    for token in doc:
        if token.dep_ == 'nsubj' and token.pos_ == 'NOUN':
            target = token.text
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            descriptive_term = prepend + token.text
    return {'sentence': sentence,
            'aspect': target,
            'description': descriptive_term,
            'sentence_sentiment': TextBlob(sentence).sentiment,
            'desc_sentiment': TextBlob(descriptive_term).sentiment
           }

In [153]:
def analyze_sentences(sentences):
    """
    Analyze a list of sentences
    """
    aspects = []
    for sentence in sentences:
        breakdown=breakdown_text(sentence)
        aspects.append(breakdown)
    return aspects

In [154]:
sentences = [
  'Sales in Finland decreased by 10.5 % in January , while sales outside Finland dropped by 17 ',
  'My time in Italy was very enjoyable',
  'I found the meal to be tasty',
  'The internet was slow.',
  'Our experience was suboptimal'
]


In [155]:
analyze_sentences(sentences)

[{'sentence': 'Sales in Finland decreased by 10.5 % in January , while sales outside Finland dropped by 17 ',
  'aspect': 'sales',
  'description': '',
  'sentence_sentiment': Sentiment(polarity=-0.2, subjectivity=0.375),
  'desc_sentiment': Sentiment(polarity=0.0, subjectivity=0.0)},
 {'sentence': 'My time in Italy was very enjoyable',
  'aspect': 'time',
  'description': 'very enjoyable',
  'sentence_sentiment': Sentiment(polarity=0.65, subjectivity=0.78),
  'desc_sentiment': Sentiment(polarity=0.65, subjectivity=0.78)},
 {'sentence': 'I found the meal to be tasty',
  'aspect': 'meal',
  'description': 'tasty',
  'sentence_sentiment': Sentiment(polarity=0.0, subjectivity=0.0),
  'desc_sentiment': Sentiment(polarity=0.0, subjectivity=0.0)},
 {'sentence': 'The internet was slow.',
  'aspect': 'internet',
  'description': 'slow',
  'sentence_sentiment': Sentiment(polarity=-0.30000000000000004, subjectivity=0.39999999999999997),
  'desc_sentiment': Sentiment(polarity=-0.30000000000000004

In [148]:
newsparser = NewsParser()
test_text_list = newsparser.get_articles("APPLE")

Failed on news.google.com/./articles/CBMifmh0dHBzOi8vd3d3LmJhcnJvbnMuY29tL2FydGljbGVzL2ZpcnN0LXF1YXJ0ZXItcGMtc2FsZXMtZ3Jldy1hdC1mYXN0ZXN0LXJhdGUtaW4tMjAteWVhcnMtaW4tYm9vbi1mb3ItYXBwbGUtaHAtZGVsbC01MTYxODMyODkyNNIBggFodHRwczovL3d3dy5iYXJyb25zLmNvbS9hbXAvYXJ0aWNsZXMvZmlyc3QtcXVhcnRlci1wYy1zYWxlcy1ncmV3LWF0LWZhc3Rlc3QtcmF0ZS1pbi0yMC15ZWFycy1pbi1ib29uLWZvci1hcHBsZS1ocC1kZWxsLTUxNjE4MzI4OTI0?hl=en-US&gl=US&ceid=US%3Aen
Failed on news.google.com/./articles/CBMiaWh0dHBzOi8vd3d3LmJhcnJvbnMuY29tL2FydGljbGVzL3dpbGwtdGVzbGEtYmUtdGhlLW5leHQtYXBwbGUtaGVyZXMtaG93LWludmVzdG9ycy1jYW4tcGxheS1pdC01MTYxODMyMjc3MNIBbWh0dHBzOi8vd3d3LmJhcnJvbnMuY29tL2FtcC9hcnRpY2xlcy93aWxsLXRlc2xhLWJlLXRoZS1uZXh0LWFwcGxlLWhlcmVzLWhvdy1pbnZlc3RvcnMtY2FuLXBsYXktaXQtNTE2MTgzMjI3NzA?hl=en-US&gl=US&ceid=US%3Aen
Failed on news.google.com/./articles/CBMigQFodHRwczovL2ZveDExb25saW5lLmNvbS9vbi1mb3gtMTEvZ29sZGVuLWFwcGxlLWF3YXJkcy9nb2xkZW4tYXBwbGUtcmVjaXBpZW50LWJyb29rZS1ob2ZmbWFuLWVuZ2FnZXMtc3R1ZGVudHMtdG8tbWFrZS1sZWFybmluZy1mdW7SAYUB

In [156]:
titles = [t['title'] for t in test_text_list]
desc = [t['desc'] for t in test_text_list]
#text = [t['text'] for t in test_text_list]
analyze_sentences(titles)

[{'sentence': 'Why new Apple products might be harder to buy this year',
  'aspect': 'products',
  'description': 'harder',
  'sentence_sentiment': Sentiment(polarity=0.018181818181818174, subjectivity=0.22727272727272727),
  'desc_sentiment': Sentiment(polarity=-0.1, subjectivity=0.0)},
 {'sentence': "Everything You Need to Know About Apple's Find My Network Accessory Program",
  'aspect': '',
  'description': '',
  'sentence_sentiment': Sentiment(polarity=0.0, subjectivity=0.0),
  'desc_sentiment': Sentiment(polarity=0.0, subjectivity=0.0)},
 {'sentence': "Apple's Find My network now offers new third-party finding experiences",
  'aspect': 'network',
  'description': 'third',
  'sentence_sentiment': Sentiment(polarity=0.11818181818181818, subjectivity=0.22727272727272727),
  'desc_sentiment': Sentiment(polarity=0.0, subjectivity=0.0)},
 {'sentence': "Apple's Worldwide Developers Conference is back in its all-online format",
  'aspect': '',
  'description': 'online',
  'sentence_senti

In [151]:
def load_tagged_set(fname):
    """
    Loaded a tagged csv file in format
    
    text | sentiment
    text | sentiment
    text | sentiment
    ...    
    text | sentiment
    """
    pre_set = pd.read_csv(fname)
    #doc = nlp(pre_set.iloc[0,0])
    return list(pre_set.to_records(index=False))

In [None]:
train = load_tagged_set('datasets/financial-headlines.csv')[:-80]
test = load_tagged_set('datasets/financial-headlines.csv')[-80:]

cl = NaiveBayesClassifier(train)
cl.show_informative_features(10)
