In [72]:
import sys
from pathlib import Path
from typing import Dict

from pyspark.sql import functions as F
from pyspark.sql.functions import udf
from pyspark.sql.types import StructType, Row, StructField, DoubleType, ArrayType
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from alpha_media_signal.config import constants
from alpha_media_signal.services import file_services
from alpha_media_signal.services import spark_service
from alpha_media_signal.utils import sentiment

spark = spark_service.getOrCreate('assign_sentiment_and_stock')

twitter_folder = 'twitter'
# twitter_folder = 'twitter_test'

tweet_path = Path(constants.DATA_PATH, twitter_folder)

In [73]:
text_1 = """@Codieisfree @JRSP_1978 @ValeTudoBro I understand. What I don't like is captain cop and jive turkey Biden😂 and like… https://t.co/fnOwHpLqm1"""
text_2 = r'RT @iprathmeshs: We belong to Atif Aslam&#44; KK &#44; Himesh reshammiya &#44; Shreya Ghoshal&#44; A.R.Rahman  music era do not doubt our taste in music. P…'

sent_1 = sentiment.get_sentiment_intensity_score(text_1)
sent_2 = sentiment.get_sentiment_intensity_score(text_2)


In [74]:
print(f'{sent_1}:{sent_2}')

{'neg': 0.139, 'neu': 0.729, 'pos': 0.132, 'compound': 0.1999}:{'neg': 0.0, 'neu': 0.811, 'pos': 0.189, 'compound': 0.6534}


In [78]:
def get_sentiment(text: str):
    sentiment_scores = sentiment.get_sentiment_intensity_score(text)
    negative = sentiment_scores['neg']
    neutral = sentiment_scores['neu']
    positive = sentiment_scores['pos']
    return {"negative": negative, "neutral": neutral, "positive": positive}

schema = StructType((StructField("negative",DoubleType()),StructField("neutral",DoubleType()),StructField("positive",DoubleType())))

gt_sentiment_udf = udf(get_sentiment, schema)

In [79]:
print(sys.path)

['', '/tmp/spark-3940d260-321f-47cd-b8bc-859144defeb4/userFiles-76c47208-30ae-42d9-af1c-9ec5170b9957', '/home/jovyan/work/alpha_media_signal/notebooks/twitter', '/usr/local/spark/python', '/usr/local/spark/python/lib/py4j-0.10.7-src.zip', '/opt/conda/lib/python37.zip', '/opt/conda/lib/python3.7', '/opt/conda/lib/python3.7/lib-dynload', '/opt/conda/lib/python3.7/site-packages', '/opt/conda/lib/python3.7/site-packages/IPython/extensions', '/home/jovyan/.ipython', '/home/jovyan/work']


In [82]:
output_folder_path = Path(tweet_path, "flattened_drop", "tweets_flat_2020-08-22_18-04-19-516.66")

print(str(output_folder_path))
print(output_folder_path.exists())

csv_list = list(file_services.list_files(output_folder_path, ends_with=".csv"))

for c in csv_list:
    print(c)
    df_init = spark.read.csv(str(c), header=True)
    
    df_limit = df_init.limit(10)
    
    df_scored = df_limit.withColumn('sentiment', get_sentiment_udf(F.col('text')))
    
    break

df_scored.withColumn('sentiment_negative', F.col('sentiment.negative'))\
         .withColumn('sentiment_neutral', F.col('sentiment.neutral'))\
         .withColumn('sentiment_positive', F.col('sentiment.positive'))\
         .drop('sentiment')

/home/jovyan/work/data/twitter/flattened_drop/tweets_flat_2020-08-22_18-04-19-516.66
True
/home/jovyan/work/data/twitter/flattened_drop/tweets_flat_2020-08-22_18-04-19-516.66/part-00000-4053aecc-de57-4a13-aae9-9850674c071c-c000.csv


Unnamed: 0,created_at,id,text,truncated,source,in_reply_to_status_id,in_reply_to_user_id,in_reply_to_screen_name,contributors,is_quote_status,...,user_default_profile,user_default_profile_image,user_following,user_follow_request_sent,user_notifications,user_translator_type,sentiment,negative,neutral,positive
0,Mon Aug 17 19:51:43 +0000 2020,1295448240796639235,@Codieisfree @JRSP_1978 @ValeTudoBro I underst...,True,"<a href=""https://mobile.twitter.com"" rel=""nofo...",1.2954476046757435e+18,9.958159112348097e+17,Codieisfree,,False,...,True,False,,,,none,"(0.139, 0.729, 0.132)",0.139,0.729,0.132
1,Mon Aug 10 00:41:47 +0000 2020,1292622134741630976,RT @iprathmeshs: We belong to Atif Aslam&#44; ...,False,"<a href=""http://twitter.com/download/android"" ...",,,,,False,...,False,False,,,,none,"(0.0, 0.811, 0.189)",0.0,0.811,0.189
2,Sat Aug 15 23:29:50 +0000 2020,1294778353896038400,RT @yunowantsmilk: no bubble updates&#44; no i...,False,"<a href=""http://twitter.com/download/android"" ...",,,,,False,...,True,False,,,,none,"(0.306, 0.694, 0.0)",0.306,0.694,0.0
3,Thu Aug 13 20:52:30 +0000 2020,1294013984610426882,RT @MayaJama: The better life gets the more pr...,False,"<a href=""http://twitter.com/download/android"" ...",,,,,False,...,True,False,,,,none,"(0.175, 0.598, 0.227)",0.175,0.598,0.227
4,Mon Aug 17 19:11:00 +0000 2020,1295437992656605184,I'm TRYING to design a tabletop RPG. And for t...,True,"<a href=""http://twitter.com/download/android"" ...",,,,,False,...,True,False,,,,none,"(0.0, 0.757, 0.243)",0.0,0.757,0.243
5,Fri Aug 14 22:47:52 +0000 2020,1294405406199029766,No longer in stock at OCS: Ghost Train Haze Re...,False,"<a href=""https://mjslist.ca/"" rel=""nofollow"">o...",,,,,False,...,False,False,,,,none,"(0.237, 0.632, 0.132)",0.237,0.632,0.132
6,Sat Aug 15 09:16:07 +0000 2020,1294563509699780608,https://t.co/mDlvgnAvBRSmileDirectClub Reports...,True,"<a href=""https://news.freeptomaineradio.com"" r...",,,,,False,...,True,False,,,,none,"(0.0, 1.0, 0.0)",0.0,1.0,0.0
7,Sat Aug 15 08:53:05 +0000 2020,1294557715713404928,Jon K. Hayashida Sells 41&#44;602 Shares of ST...,False,"<a href=""https://zapier.com/"" rel=""nofollow"">Z...",,,,,False,...,True,False,,,,none,"(0.0, 0.833, 0.167)",0.0,0.833,0.167
8,Sat Aug 15 16:29:39 +0000 2020,1294672614057705473,UMB Financial $UMBF Lifted to “Hold” at Bidask...,False,"<a href=""https://zapier.com/"" rel=""nofollow"">Z...",,,,,False,...,False,False,,,,none,"(0.0, 1.0, 0.0)",0.0,1.0,0.0
9,Mon Aug 10 20:16:12 +0000 2020,1292917687845367814,I'd like a little shot of @moderna_tx &#44; mR...,False,"<a href=""https://mobile.twitter.com"" rel=""nofo...",,,,,False,...,False,False,,,,none,"(0.0, 0.839, 0.161)",0.0,0.839,0.161


In [None]:
# Associate

def 
