# The objective is to assign a Sentiment Score to each sentence of each tweet. Need to install java, stanza, pytorch to use CoreNLP interface from stanza. Can be cumbersome.

In [2]:
import stanza
import datetime as dt
import pandas as pd

corenlp_dir = './corenlp'

In [3]:
# Set the CORENLP_HOME environment variable to point to the installation location
import os
os.environ["CORENLP_HOME"] = corenlp_dir

In [4]:
# Import client module
from stanza.server import CoreNLPClient

Only threads that are verified to be correct are processed: minimum of one customer tweet and a company reply (2 tweets in the thread), has to be initiated by the customer, the links to next and previous tweets must be valid, the threaded tweets should follow a timely order with the most recent ones at the end.

In [5]:
threads = pd.read_csv('threads.csv', dtype = {'tweet_id': str})
threads["first_sentim_l"] = ""
threads["last_sentim_l"] = ""
threads["first_tweet_text"] = ""
threads["last_tweet_text"] = ""

mask = (threads["length"] > 2) &  threads["verify_alternance"] & threads["inbound_first"] & threads["verify_thread"] & threads["verify_time"]  
thread_ok = threads[mask].copy()
thread_ok.set_index('tweet_id', inplace = True)

full_df = pd.read_csv("emojiTranslatedCleanedNoUnderscore.csv", na_filter= False, parse_dates = ['created_at'],
                      dtype = {'tweet_id': str,'in_response_to_tweet_id': str, 'inbound':bool, 'response_tweet_id':str })
full_df.set_index("tweet_id", inplace = True)

inbound_col, tweet_col, first_sentim_col, last_sentim_col, first_tweet_col, last_tweet_col = thread_ok.columns.get_indexer(["inbound_l","tweet_l","first_sentim_l", "last_sentim_l", "first_tweet_text","last_tweet_text"])


This piece takes hours long to run. It was 8 hours in my Corei5 PC. Beware. For Demo replace the "in range(len(thread_ok)):" with a more decent number like 1001.

In [6]:
print("Starting a server with the Python \"with\" statement...")
with CoreNLPClient(annotators=['sentiment'], 
                   memory='6G', endpoint='http://localhost:9001', be_quiet=True, timeout = 100000) as client:
    print("Processing this number of valid threads: ", len(thread_ok))
    for row in range(1001):   
    #for row in range(len(thread_ok)):           
        if row % 1000 == 0:
            print("Working on thread number: ", row, "time: ", dt.datetime.now().time())
        inbound_text, tweet_text = thread_ok.iloc[row,[inbound_col,tweet_col]]
        inbound_list = inbound_text.split("|")
        tweet_list = tweet_text.split("|")
        first_user_tweet = tweet_list[0]
        last_user_tweet = tweet_list[::-1][inbound_list[::-1].index("True")]
        first_tweet_text = full_df.loc[first_user_tweet,"text"]
        last_tweet_text = full_df.loc[last_user_tweet,"text"]
        first_doc = client.annotate(first_tweet_text)
        last_doc = client.annotate(last_tweet_text)
        first_sentiment = []
        last_sentiment = []
        for i, sentence in enumerate(first_doc.sentence):
            first_sentiment.append(sentence.sentiment)
        for sentence in last_doc.sentence:
            last_sentiment.append(sentence.sentiment)
        thread_ok.iloc[row,[first_sentim_col,last_sentim_col, first_tweet_col, last_tweet_col]]= ['|'.join(first_sentiment),'|'.join(last_sentiment), first_tweet_text, last_tweet_text]
print("\nThe server should be stopped upon exit from the \"with\" statement.")

2020-12-13 00:09:25 INFO: Writing properties to tmp file: corenlp_server-a4136d562b8542d5.props
2020-12-13 00:09:25 INFO: Starting server with command: java -Xmx6G -cp ./corenlp\* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 100000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-a4136d562b8542d5.props -annotators sentiment -preload -outputFormat serialized


Starting a server with the Python "with" statement...
Processing this number of valid threads:  122609
Working on thread number:  0 time:  00:09:26.208590
Working on thread number:  1000 time:  00:13:03.632660

The server should be stopped upon exit from the "with" statement.


In [None]:
thread_ok.to_csv(r'thread_first_last_sentiment.csv')