In [None]:
# Import dependencies
import pandas as pd
import pymongo
from pycorenlp import StanfordCoreNLP

## Connect to database

In [None]:
# Create connection variable
conn = 'mongodb://localhost:27017'

# Pass connection to the pymongo instance.
client = pymongo.MongoClient(conn)

# Connect to a database. Will create one if not already available.
db = client.tweets

## Retrieve a tweet from database

In [None]:
# search tweets in database using relevant keywords
query = { "text": { "$regex": '.*market.*|.*SPX.*|.*stock.*|.*fed.*|.*econom.*|.*bull.*|.*bear.*|.*momentum.*|.*volat.|.*treasury.*|.*powell.*|.*policy.*|.*spending.*|.*tariff.*|.*trade.*|.*SPY.*|.*tax.*|.*interest.', 
                   "$options": 'i'}} 


tweets = db['capstone_finance'].find(query)

# build a dataframe 
df = pd.DataFrame(list(tweets))

In [None]:
# Create a new column for sentiment
df['sentiment_by_sentence'] = ''

## Sentiment analysis with CoreNLP

In [None]:
def analyze_sentiment(tweet):
    
    # The functional analyses a given text (a string) and returns a sentiment of every senence as a list of strings
    
    sentimens = []
    
    try:
    
        result = nlp.annotate(tweet,
                              properties={
                                  'annotators': 'sentiment, ner, pos',
                                  'outputFormat': 'json',
                                  'timeout': 1000,
                              })

        sentiments = [i["sentiment"] for i in result["sentences"]]
        
    except:
        print("Error")
        return "nan"
    
    return sentiments
                       

In [None]:
# Apply analyze sentiment function to eery tweet in the DataFrame
df['sentiment_by_sentence'] = df['text'].apply(analyze_sentiment)

In [None]:
# Drop the rows where sentiment_by_sentence is a nan

df_clean = df[df['sentiment_by_sentence'] != 'nan']
df_clean.head()

## Update MongoDB records with an array of sentence sentiments

In [None]:
for i, sentiment in zip(df_clean['tweet_id'], df_clean['sentiment_by_sentence']):
    db['capstone_finance'].update_one({'tweet_id': i}, {'$set': {'sentiment_by_sentence': sentiment}}, upsert = False)

## How to connect to and run CoreNLP server

Source: https://towardsdatascience.com/natural-language-processing-using-stanfords-corenlp-d9e64c1e1024

To start CoreNLP server:
- Open command prompt
- Go to the CoreNLP directory
- Start server by entering the command: java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 15000

In [None]:
# Examaple code
nlp = StanfordCoreNLP('http://localhost:9000')

text = "This movie was actually neither that funny, nor super witty. The movie was meh. I liked watching that movie. If I had a choice, I would not watch that movie again."
result = nlp.annotate(text,
                   properties={
                       'annotators': 'sentiment, ner, pos',
                       'outputFormat': 'json',
                       'timeout': 1000,
                   })

In [None]:
for s in result["sentences"]:
    print(s["index"], s["sentiment"])


## Stanford Demo code

In [None]:

from stanfordcorenlp import StanfordCoreNLP
import logging
import json

class StanfordNLP:
    def __init__(self, host='http://localhost', port=9000):
        self.nlp = StanfordCoreNLP(host, port=port,
                                   timeout=30000)  # , quiet=False, logging_level=logging.DEBUG)
        self.props = {
            'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,depparse,dcoref,relation',
            'pipelineLanguage': 'en',
            'outputFormat': 'json'
        }

    def word_tokenize(self, sentence):
        return self.nlp.word_tokenize(sentence)

    def pos(self, sentence):
        return self.nlp.pos_tag(sentence)

    def ner(self, sentence):
        return self.nlp.ner(sentence)

    def parse(self, sentence):
        return self.nlp.parse(sentence)

    def dependency_parse(self, sentence):
        return self.nlp.dependency_parse(sentence)

    def annotate(self, sentence):
        return json.loads(self.nlp.annotate(sentence, properties=self.props))

    @staticmethod
    def tokens_to_dict(_tokens):
        tokens = defaultdict(dict)
        for token in _tokens:
            tokens[int(token['index'])] = {
                'word': token['word'],
                'lemma': token['lemma'],
                'pos': token['pos'],
                'ner': token['ner']
            }
        return tokens

if __name__ == '__main__':
    sNLP = StanfordNLP()
    text = 'A blog post using Stanford CoreNLP Server. Visit www.khalidalnajjar.com for more details.'
    print( "Annotate:", sNLP.annotate(text))
    print( "POS:", sNLP.pos(text))
    print( "Tokens:", sNLP.word_tokenize(text))
    print( "NER:", sNLP.ner(text))
    print( "Parse:", sNLP.parse(text))
    print( "Dep Parse:", sNLP.dependency_parse(text))

In [None]:
text = "This movie was actually neither that funny, nor super witty. The movie was meh. I liked watching that movie. If I had a choice, I would not watch that movie again."
result = nlp.annotate(text,
                   properties={
                       'annotators': 'sentiment, ner, pos',
                       'outputFormat': 'json',
                       'timeout': 1000,
                   })

In [None]:
from pycorenlp import StanfordCoreNLP

nlp = StanfordCoreNLP('http://localhost:9000')
res = nlp.annotate("I love you. I hate him. You are nice. He is dumb",
                   properties={
                       'annotators': 'sentiment',
                       'outputFormat': 'json',
                       'timeout': 1000,
                   })
for s in res["sentences"]:
    print("%d: '%s': %s %s" % (
        s["index"],
        " ".join([t["word"] for t in s["tokens"]]),
        s["sentimentValue"], s["sentiment"]))