## New analysis

Analyzes text, HTML, or a public webpage for the following features:
        - Categories
        - Concepts
        - Emotion
        - Entities
        - Keywords
        - Metadata
        - Relations
        - Semantic roles
        - Sentiment.

In [7]:
import codecs
import json
import glob
import numpy as np
import pandas as pd
from progress import ProgressTracker
import pprint as pp

In [2]:
with open(".IBM.key") as fp:
    key = json.load(fp)

In [6]:
from __future__ import print_function
import json
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, EmotionOptions
from watson_developer_cloud import natural_language_understanding_v1
service = NaturalLanguageUnderstandingV1(
    version='2018-03-16',
    ## url is optional, and defaults to the URL below. Use the correct URL for your region.
    # url='https://gateway.watsonplatform.net/natural-language-understanding/api',
    iam_apikey=key["apikey"],
)

In [8]:
KeywordsOptions?

In [None]:
# Test
response = service.analyze(
    text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! '
    'Superman fears not Banner, but Wayne.',
    features=Features(entities=EntitiesOptions(),
                      keywords=KeywordsOptions(emotion=True))).get_result()

print(json.dumps(response, indent=2))

## Picks up from extractTweets

In [9]:
tweetsDF = pd.read_csv("data/tweets.csv")

In [10]:
metaDatMin = pd.read_csv("data/metaDatMin.csv")

In [11]:
tweet1 = list(tweetsDF.loc[1:3,"full_text"])
tweet1

['#TrumpShutdown not just about federal workers, it’s about all of us.  If we fly, safety &amp; delays at airports; if we file tax return, no one to process &amp; delays in refund check; if a Senior or veteran applying for assistance,  no one to process &amp; delay in checks.  #EndShutdown https://t.co/g16aYNihPU',
 'And bring the agents responsible for the two children who died in your custody with you!  Still lots of questions. #JusticeforFelipe #JusticeForJakelin https://t.co/y6hGXAU1ZJ',
 'For Texans, the president’s demand for a wall will trample private property rights and infringe on a church. How can conservatives defend this? #TrumpShutdown https://t.co/3M9gsQEQxE']

In [13]:
tweet11 = "\r\n\t".join(tweet1)

In [14]:
tweetsDF.groupby("twitter_account")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1103b1c88>

In [15]:
RepSylviaTweetsConcat = "--".join(tweetsDF.loc[tweetsDF.loc[:,"twitter_account"]=="RepSylviaGarcia","full_text"])

In [16]:
response = service.analyze(
    text=RepSylviaTweetsConcat,
    features=Features(entities=EntitiesOptions(emotion=True, sentiment=True),
                      keywords=KeywordsOptions(emotion=True, sentiment=True))).get_result()

In [17]:
response

{'usage': {'text_units': 1, 'text_characters': 2459, 'features': 2},
 'language': 'en',
 'keywords': [{'text': 'JusticeForJazmine https://t.co/T5CXIpmUVF--#nobabyjails',
   'sentiment': {'score': -0.896636, 'label': 'negative'},
   'relevance': 0.629309,
   'emotion': {'sadness': 0.692436,
    'joy': 0.034563,
    'fear': 0.029867,
    'disgust': 0.153688,
    'anger': 0.280267},
   'count': 1},
  {'text': 'Sylvia Garcia',
   'sentiment': {'score': -0.490122, 'label': 'negative'},
   'relevance': 0.624276,
   'emotion': {'sadness': 0.120218,
    'joy': 0.507051,
    'fear': 0.06623,
    'disgust': 0.15095,
    'anger': 0.108609},
   'count': 2},
  {'text': 'TrumpShutdown https://t.co/3M9gsQEQxE--RT',
   'sentiment': {'score': -0.659505, 'label': 'negative'},
   'relevance': 0.610097,
   'emotion': {'sadness': 0.151475,
    'joy': 0.251257,
    'fear': 0.479617,
    'disgust': 0.240376,
    'anger': 0.01938},
   'count': 1},
  {'text': 'former state senator',
   'sentiment': {'score': -

In [18]:
len(response["keywords"])

50

In [19]:
for i in range(50):
    print(response["keywords"][i]["text"])

JusticeForJazmine https://t.co/T5CXIpmUVF--#nobabyjails
Sylvia Garcia
TrumpShutdown https://t.co/3M9gsQEQxE--RT
former state senator
✊🏼✊🏽✊🏾
House Democrats
Southwest Key Programs
JusticeForJakelin https://t.co/y6hGXAU1ZJ--For Texans
compromise funding bills
refund check
D-Texas
tax return
private property rights
Congress https://t.co/htgkif7rXr https://t.co/nUQjs2Oq2t--We work
EndShutdown https://t.co/g16aYNihPU--And
video of staff members
Nutrition assistance
people of the 29th District
House
Justice Department
president’s demand
first female Speaker
se puede
🏿Swearing
Dems’ commitment
amp
delays
tax refund
assistance
city controller
last month
Still lots of questions
Latina
children
veteran
pressure
safety
airports
TX29 children
man
wall
first Latinas
Houston
delay
agents
justice
checks
bills
days
Texas congresswoman


In [20]:
sum(keyword["emotion"]["sadness"] for keyword in response["keywords"])

16.597171000000003

In [26]:
groupedTweets = tweetsDF.groupby('twitter_account')['full_text'].apply(lambda x: "%s" % ' --\n '.join(x)).reset_index()

In [27]:
groupedTweets

Unnamed: 0,twitter_account,full_text
0,BettyMcCollum04,There’s still time to #GetCovered! @MNsure’s l...
1,BradSherman,Proud to be sworn in by @SpeakerPelosi to begi...
2,Call_Me_Dutch,"RT @RepAnthonyBrown: I represent over 50,000 f..."
3,CathyMcMorris,HAPPENING SOON: I’ll speak with @mschlapp on @...
4,ChrisMurphyCT,Cassie Bernall was 17. Her parents used to cal...
5,DesJarlaisTN04,Economy in last two years has rebounded from O...
6,EleanorNorton,Thank you to the following Members for introdu...
7,GraceNapolitano,RT @RepThompson: The American people have been...
8,GrahamBlog,.@tim_cook Our nation is at war &amp; this Iph...
9,JanSchakowsky,RT @CNN: JUST IN: House Speaker Nancy Pelosi a...


## Merge with metadata

In [24]:
metaDatMin.sort_values(by="twitter_account")

Unnamed: 0.1,Unnamed: 0,api_uri,at_large,contact_form,crp_id,cspan_id,date_of_birth,district,dw_nominate,facebook_account,...,state_rank,suffix,title,total_present,total_votes,twitter_account,url,votes_with_party_pct,votesmart_id,youtube_account
36,817,https://api.propublica.org/congress/v1/members...,False,,N00012942,86670.0,1954-07-12 00:00:00,4,,repbettymccollum,...,,,Representative,0.0,12.0,BettyMcCollum04,https://mccollum.house.gov,83.33,3812.0,
90,379,https://api.propublica.org/congress/v1/members...,False,,N00006897,45124.0,1954-10-24 00:00:00,30,-0.343,63158229861,...,,,Representative,0.0,976.0,BradSherman,https://sherman.house.gov,92.00,142.0,shermanca27
81,355,https://api.propublica.org/congress/v1/members...,False,,N00025482,49155.0,1946-01-31 00:00:00,2,-0.298,1.84757E+14,...,,,Representative,0.0,976.0,Call_Me_Dutch,https://ruppersberger.house.gov,91.35,36130.0,ruppersberger
40,822,https://api.propublica.org/congress/v1/members...,False,,N00026314,1013063.0,1969-05-22 00:00:00,5,,mcmorrisrodgers,...,,,Representative,0.0,12.0,CathyMcMorris,https://mcmorris.house.gov,83.33,3217.0,mcmorrisrodgers
76,1059,https://api.propublica.org/congress/v1/members...,,,N00027566,1021270.0,1973-08-03 00:00:00,,,chrismurphyct,...,junior,,"Senator, 1st Class",,,ChrisMurphyCT,https://www.murphy.senate.gov,,17189.0,senchrismurphy
97,658,https://api.propublica.org/congress/v1/members...,False,,N00030957,623517.0,1964-02-21 00:00:00,4,,ScottDesJarlaisTN04,...,,,Representative,0.0,12.0,DesJarlaisTN04,https://desjarlais.house.gov,83.33,123473.0,ScottDesJarlaisTN04
39,299,https://api.propublica.org/congress/v1/members...,True,https://norton.house.gov/contact/email,N00001692,882.0,1937-06-13 00:00:00,At-Large,,CongresswomanNorton,...,,,Delegate,,0.0,EleanorNorton,https://norton.house.gov,,775.0,EleanorHNorton
31,839,https://api.propublica.org/congress/v1/members...,False,,N00006789,57873.0,1936-12-04 00:00:00,32,,RepGraceNapolitano,...,,,Representative,0.0,12.0,GraceNapolitano,https://napolitano.house.gov,83.33,8393.0,RepGraceNapolitano
78,486,https://api.propublica.org/congress/v1/members...,,https://www.lgraham.senate.gov/public/index.cf...,N00009975,36782.0,1955-07-09 00:00:00,,0.409,USSenatorLindseyGraham,...,senior,,"Senator, 2nd Class",0.0,444.0,GrahamBlog,https://www.lgraham.senate.gov/public,97.15,21992.0,USSenLindseyGraham
47,905,https://api.propublica.org/congress/v1/members...,False,,N00004724,57874.0,1944-05-26 00:00:00,9,,janschakowsky,...,,,Representative,0.0,12.0,JanSchakowsky,https://schakowsky.house.gov,83.33,6387.0,repschakowsky


In [23]:
metaDatMin.loc[:5, "twitter_account"]

0    RepSylviaGarcia
1          RepSinema
2    SenatorCantwell
3        MazieHirono
4          RepSinema
5         repmialove
Name: twitter_account, dtype: object

In [62]:
"""SELECT t1.*, t2.full_text from metadat as t1 join groupedTweets as t2 on t1.twitter_account == t2.twitter_account"""

'SELECT t1.*, t2.full_text from metadat as t1 join groupedTweets as t2 on t1.twitter_account == t2.twitter_account'

In [91]:
metaDatMin.join(groupedTweets, on="twitter_account", how="inner")

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat

In [82]:
metaDatMin.dtypes["twitter_account"]

dtype('O')

In [89]:
groupedTweets.dtypes["twitter_account"]

dtype('O')

## Not helpful

In [87]:
# groupedTweets.twitter_account = groupedTweets.twitter_account.str.encode('utf-8')

In [88]:
# metaDatMin.twitter_account = metaDatMin.twitter_account.str.encode('utf-8')

In [92]:
tracker = ProgressTracker(len(groupedTweets))

In [94]:
responses = []
for i, tweetSet in enumerate(groupedTweets.loc[:,"full_text"]):
    response = service.analyze(
        text=tweetSet,
        features=Features(entities=EntitiesOptions(emotion=True, sentiment=True),
                          keywords=KeywordsOptions(emotion=True, sentiment=True))).get_result()
    responses.append(response)
    tracker.update(i)

5 percent done
10 percent done
15 percent done
20 percent done
25 percent done
30 percent done
35 percent done
40 percent done
45 percent done
50 percent done
55 percent done
60 percent done
65 percent done
70 percent done
75 percent done
80 percent done
85 percent done
90 percent done
95 percent done


In [95]:
len(responses)

86

In [29]:
import pickle

In [96]:
pickle.dump(responses, open('data/IBMResponses.pkl', 'wb'))

In [30]:
responses1 = pickle.load(open('data/IBMResponses.pkl', 'rb'))

In [32]:
responses = responses1

In [103]:
groupedTweets.loc[0,"full_text"]

'There’s still time to #GetCovered! @MNsure’s last day of #OpenEnrollment is just 5 days away (1/13). Meet with an assister today &amp; get free help: https://t.co/Ja2wX1J3tq https://t.co/TDExOMi34v --\n I am saddened by the passing of my friend and former MN House colleague Tom Rukavina. While we didn’t always agree, it was an honor to fight alongside him on behalf of hardworking Minnesotans. His passion for the people he represented and sense of humor will be missed. --\n Need help with a federal agency? My staff will be in Stillwater tomorrow -- Tuesday, Jan. 8 -- to assist. Learn more &amp; make an appointment → https://t.co/DlyyZnGneQ https://t.co/C7Gy7l5pX8 --\n Minnesota is in good hands with @julieblaha as State Auditor. Congratulations on your inauguration! --\n Looking forward to continued progress for Minnesota voters under Steve Simon’s leadership as he’s sworn in for a second term as MN’s Secretary of State. Congratulations, @MNSteveSimon! --\n I know from our many years o

In [104]:
pp.pprint(responses[0]["keywords"])

[{'count': 1,
  'emotion': {'anger': 0.069223,
              'disgust': 0.086477,
              'fear': 0.011827,
              'joy': 0.613442,
              'sadness': 0.085489},
  'relevance': 0.555145,
  'sentiment': {'label': 'positive', 'score': 0.94042},
  'text': 'years of dedicated public service'},
 {'count': 7,
  'emotion': {'anger': 0.153825,
              'disgust': 0.440669,
              'fear': 0.038792,
              'joy': 0.30408,
              'sadness': 0.192395},
  'relevance': 0.554577,
  'sentiment': {'label': 'negative', 'mixed': '1', 'score': -0.778695},
  'text': 'American people'},
 {'count': 2,
  'emotion': {'anger': 0.085175,
              'disgust': 0.023002,
              'fear': 0.126905,
              'joy': 0.036896,
              'sadness': 0.220109},
  'relevance': 0.552202,
  'sentiment': {'label': 'negative', 'score': -0.626272},
  'text': 'federal agency'},
 {'count': 1,
  'emotion': {'anger': 0.046492,
              'disgust': 0.061873,
        

In [33]:
def getEmotion(keyword, emotion):
    try:
        return keyword["emotion"][emotion]
    except KeyError:
        return 0

In [34]:
emotionTallys = {}
emotions = ("anger", "disgust", "fear", "joy", "sadness")
for emotion in emotions:
    emotionTallys[emotion] = [sum(getEmotion(keyword, emotion) for keyword in response["keywords"]) for response in responses]

In [35]:
emotionTallys["disgust"][:5]

[12.180891999999995,
 7.840825000000002,
 8.941889,
 5.460022999999999,
 10.769493999999998]

In [36]:
emotionTallys

{'anger': [7.481342999999999,
  7.076859000000001,
  6.359759000000001,
  3.143239,
  10.001310999999998,
  6.962200000000001,
  9.820251,
  5.900742000000001,
  10.431705000000003,
  8.619988999999997,
  3.3756369999999998,
  3.783326,
  10.764426,
  8.702764000000004,
  3.737167,
  3.8055659999999993,
  5.401173,
  6.950645,
  3.5849910000000014,
  4.963512,
  6.124818000000001,
  4.808764999999999,
  3.8403420000000006,
  5.425565999999999,
  5.894471000000001,
  6.862951999999998,
  7.906724000000001,
  6.474148999999998,
  3.8005250000000004,
  6.690532999999999,
  4.749309,
  8.098965000000002,
  3.907908,
  6.521464,
  8.265009,
  6.743813999999999,
  6.187695000000002,
  6.275063999999999,
  6.5739740000000015,
  4.9134899999999995,
  7.023959000000002,
  7.161707000000001,
  6.820056999999999,
  5.367714,
  3.9017590000000006,
  5.474398000000001,
  5.7823990000000025,
  6.728176000000001,
  7.370973,
  6.746156000000001,
  4.290567999999999,
  2.6289379999999998,
  3.48592500

In [37]:
emotionsDF = pd.DataFrame(emotionTallys)

In [38]:
len(emotionsDF)

86

In [39]:
len(groupedTweets)

86

In [40]:
len(tweetsDF)

8209

In [41]:
len(metaDatMin)

100

In [42]:
groupedTweetsWithAnalysisMin = groupedTweets.join(emotionsDF)

In [43]:
groupedTweetsWithAnalysisMin.to_csv("data/groupedTweetsWithEmotions.csv", index=False)

In [44]:
pd.read_csv("data/groupedTweetsWithEmotions.csv")

Unnamed: 0,twitter_account,full_text,anger,disgust,fear,joy,sadness
0,BettyMcCollum04,There’s still time to #GetCovered! @MNsure’s l...,7.481343,12.180892,4.780840,15.476950,11.547214
1,BradSherman,Proud to be sworn in by @SpeakerPelosi to begi...,7.076859,7.840825,5.106363,13.308787,12.367955
2,Call_Me_Dutch,"RT @RepAnthonyBrown: I represent over 50,000 f...",6.359759,8.941889,3.348195,18.177092,9.000912
3,CathyMcMorris,HAPPENING SOON: I’ll speak with @mschlapp on @...,3.143239,5.460023,4.641078,24.160086,12.697618
4,ChrisMurphyCT,Cassie Bernall was 17. Her parents used to cal...,10.001311,10.769494,5.147196,10.932396,14.585866
5,DesJarlaisTN04,Economy in last two years has rebounded from O...,6.962200,10.891910,7.197007,14.038458,15.626289
6,EleanorNorton,Thank you to the following Members for introdu...,9.820251,7.497927,3.587520,16.637091,10.993626
7,GraceNapolitano,RT @RepThompson: The American people have been...,5.900742,8.136046,6.161445,18.672098,13.573010
8,GrahamBlog,.@tim_cook Our nation is at war &amp; this Iph...,10.431705,10.514689,11.200290,8.528350,10.665003
9,JanSchakowsky,RT @CNN: JUST IN: House Speaker Nancy Pelosi a...,8.619989,11.480550,4.482566,13.015394,14.592579


## Scrap

In [2]:
with open("dataDir.txt", 'r') as f:
    dir = f.read().strip()
dir

'/Volumes/GoogleDrive/My Drive/Yale/4/spring/Research/PolySpeech'

In [3]:
suffix = ".json"
# prefix = dir + "/WIL Twitter Data With Vader/"
prefix = dir + "/WIL Twitter Data/"

In [4]:
getName = slice(*(len(prefix),-len(suffix)))

In [9]:
json_paths = glob.glob(prefix + "*" + suffix)
print(len(json_paths))
json_paths[0]
with codecs.open(json_paths[0], 'r', encoding='utf8') as rf: 
    read_tweets = json.load(rf)
    print(len(read_tweets))

2065
3156


In [62]:
def tagIBM(analyzer, start=0, limit=None):
    if limit == None:
        limit = itertools.count()
    else:
        limit = range(start, limit)
    for (i,json_path),_ in zip(enumerate(glob.glob(prefix + "*" + suffix)), limit):
        name = json_path[getName]
        with codecs.open(json_path, 'r', encoding='utf8') as rf: 
            read_tweets = json.load(rf)
            read_tweets = [json.loads(t) for t in read_tweets]
            tweets = [read_tweets[i]['full_text'] for i,_ in enumerate(read_tweets)]
            print(tweets[0])
            print(i,name, "Tweet count:", len(tweets))
            for (j, tweet),_ in zip(enumerate(tweets), limit):
                print(tweet)
                response = service.analyze(text=tweet,
                                           features=Features(emotion=natural_language_understanding_v1.EmotionOptions(),
                                                              keywords=KeywordsOptions())).get_result()
                read_tweets[j]['IBM'] = response
#         with open(outPrefix + name + suffix, 'w') as outfile:
#             json.dump(read_tweets, outfile)
#         print([read_tweets[i]['IBM'] for i,_ in enumerate(read_tweets)])
    return read_tweets
tweets = tagIBM(service, limit=1)

RT @briankoppelman: 1)Some thoughts on the value of reading fiction: When I was in my 20s/early 30s, I read 3 or 4 books a week. Mostly fic…
0 heykerikae Tweet count: 3156
RT @briankoppelman: 1)Some thoughts on the value of reading fiction: When I was in my 20s/early 30s, I read 3 or 4 books a week. Mostly fic…


In [63]:
tweets[0]

{'created_at': 'Wed Dec 26 00:10:32 +0000 2018',
 'full_text': 'RT @briankoppelman: 1)Some thoughts on the value of reading fiction: When I was in my 20s/early 30s, I read 3 or 4 books a week. Mostly fic…',
 'hashtags': [],
 'id': 1077718265567371264,
 'id_str': '1077718265567371264',
 'lang': 'en',
 'retweet_count': 3107,
 'retweeted_status': {'created_at': 'Mon Dec 24 22:24:40 +0000 2018',
  'favorite_count': 11094,
  'full_text': '1)Some thoughts on the value of reading fiction: When I was in my 20s/early 30s, I read 3 or 4 books a week. Mostly fiction. This sounds like a lot to non-writers. But it’s not. It never felt like work. Or if it did, I was on to the next book.',
  'hashtags': [],
  'id': 1077329236149571584,
  'id_str': '1077329236149571584',
  'lang': 'en',
  'place': {'attributes': {},
   'bounding_box': {'coordinates': [[[-80.147162, 25.7613083],
      [-80.1187628, 25.7613083],
      [-80.1187628, 25.872623],
      [-80.147162, 25.872623]]],
    'type': 'Polygon'},
   

In [30]:
natural_language_understanding_v1.EmotionOptions

watson_developer_cloud.natural_language_understanding_v1.EmotionOptions

In [35]:
response = service.analyze(
    text=tweets,
    features=Features(emotion=natural_language_understanding_v1.EmotionOptions(),
                      keywords=KeywordsOptions())).get_result()

In [36]:
response

{'usage': {'text_units': 1, 'text_characters': 140, 'features': 2},
 'language': 'en',
 'keywords': [{'text': 'value of reading fiction',
   'relevance': 0.948469,
   'count': 1},
  {'text': '20s', 'relevance': 0.82297, 'count': 1},
  {'text': 'RT', 'relevance': 0.817649, 'count': 1},
  {'text': 'thoughts', 'relevance': 0.657903, 'count': 1},
  {'text': 'books', 'relevance': 0.565563, 'count': 1},
  {'text': 'week', 'relevance': 0.554172, 'count': 1},
  {'text': '30s', 'relevance': 0.493918, 'count': 1},
  {'text': 'briankoppelman', 'relevance': 0.327459, 'count': 1}],
 'emotion': {'document': {'emotion': {'sadness': 0.41707,
    'joy': 0.415651,
    'fear': 0.131175,
    'disgust': 0.073958,
    'anger': 0.019712}}}}

## OG analysis

In [1]:
from __future__ import print_function
import json
from os.path import join, dirname
from watson_developer_cloud import ToneAnalyzerV3

tone_analyzer = ToneAnalyzerV3(
    username='e382ff56-867f-4857-a0be-5d3da0968b61',
    password='yfRH7C1jLu4A',
    version='2017-09-26')

In [2]:
from collections import Counter
import pandas as pd
import numpy as np

In [17]:
print(json.dumps(tone_analyzer.tone(tone_input='I am very happy. It is a good day.',
                                    content_type="text/plain", sentences=False), indent=2))

{
  "document_tone": {
    "tones": [
      {
        "score": 0.961678,
        "tone_id": "joy",
        "tone_name": "Joy"
      },
      {
        "score": 0.983213,
        "tone_id": "confident",
        "tone_name": "Confident"
      }
    ]
  }
}


In [4]:
# df = pd.read_csv('data/week1-mar25-min.csv')

In [9]:
df = pd.read_csv('pol_tweets_50.csv')
#df = pd.read_csv('men200.csv')

In [1]:
#df

In [14]:
len(df) / 50

216.0

In [15]:
111 * 2

222

In [20]:
step = 10
it = len(df) / step
pref_len = 20
l=0
fails = 1
for i in range(int(it)):
    idx = i*step
    if i % 10 == 0:
        print("progress: i:", i, idx*100/len(df), "%")
    tweet_batch = df.loc[idx:idx+step, 'tweet_text']
    #tweet_batch = [tweet.replace('.', '--').replace('!', '--') for tweet in tweet_batch]
    raw = tone_analyzer.tone(tone_input='. '.join(tweet_batch), content_type='text/plain', sentences=True)
    for j, sent_dict in enumerate(raw['sentences_tone']):
        # find the csv sentence starting with this, if any.
        # first 20 letters serve as key
        key = sent_dict['text'][:pref_len]
        # Linear search from index to idx + step for where to put the results
        for k in range(step):
            if df.loc[idx + k, 'tweet_text'][:pref_len] == key:
                if idx + k <= l:
                    print('Rewriting previous')
                l = idx+k
                df.loc[l, 'IBM_text'] = sent_dict['text']
                for tone in sent_dict['tones']:
                    df.loc[l, tone['tone_name']] = tone['score']
                break
            if k == step - 1:
                # Failure
                if fails % 10 == 0:
                    fails += 1
                    print('Failed 10 matches')
print(fails)

progress: i: 0 0.0 %
Rewriting previous
Rewriting previous
Rewriting previous
Rewriting previous
progress: i: 10 0.9259259259259259 %
progress: i: 20 1.8518518518518519 %


KeyboardInterrupt: 

In [22]:
df[-10:]

Unnamed: 0,first_name,last_name,gender,age,type,senate_class,party,twitter,tweet_id,tweet_text,tweet_retweet_count,tweet_favorite_count,IBM_text,Sadness,Joy,Tentative,Analytical,Confident,Fear,Anger
10790,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,955165484068036608,My town hall in Polk County originally schedul...,8,5,My town hall in Polk County originally schedul...,,,,,,,
10791,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,954464269805989888,My town halls in Tillamook and Lincoln Countie...,4,8,My town halls in Tillamook and Lincoln Countie...,,0.507633,,,,,
10792,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,954170091993788416,I wouldn't expect a student government to oper...,9,25,I wouldn't expect a student government to oper...,,,,0.703409,,,
10793,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,952307923731935232,Four years ago I met three Marine Corps combat...,13,36,Four years ago I met three Marine Corps combat...,,0.515799,,,,,
10794,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,952266732898418688,Thanks to all who came out to see Senator Merk...,4,17,Thanks to all who came out to see Senator Merk...,,0.554443,,0.579367,,,
10795,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,951935153919717376,Tune in to News @1190KEX at 1:50 while I speak...,2,5,Tune in to News @1190KEX at 1:50 while I speak...,,,,,,,
10796,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,951931235131101184,Thanks to the Canby Rotary for having me today...,2,4,,,,,,,,
10797,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,951912635112525824,Happy to see the great work being done by @Car...,5,6,Happy to see the great work being done by @Car...,,0.886998,,,,,
10798,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,941714486792142848,Open enrollment ends at MIDNIGHT tonight! Make...,4,2,Open enrollment ends TOMORROW!,,,,,,,
10799,Kurt,Schrader,M,66,rep,,Democrat,RepSchrader,941359634971062275,Open enrollment ends TOMORROW! Visit https://t...,5,4,,,,,,,,


In [24]:
df.to_csv('more-data/all_w_IBM_50.csv')

In [2]:
#pd.read_csv('more-data/all_w_IBM_50.csv')

In [130]:
for i, tweet in enumerate(df.loc[:1000, 'tweet_text']):
    
    raw = tone_analyzer.tone(tone_input=tweet, content_type='text/plain', sentences=False)
    print(raw)
    for tone in raw['document_tone']['tones']:
        print(tone['tone_name'], tone['score'])
        df.loc[i, tone['tone_name']] = tone['score']

{'document_tone': {'tones': []}}
{'document_tone': {'tones': []}}
{'document_tone': {'tones': []}}
{'document_tone': {'tones': [{'score': 0.915262, 'tone_id': 'joy', 'tone_name': 'Joy'}]}}
Joy 0.915262
{'document_tone': {'tones': []}}
{'document_tone': {'tones': []}}
{'document_tone': {'tones': [{'score': 0.783873, 'tone_id': 'joy', 'tone_name': 'Joy'}]}}
Joy 0.783873
{'document_tone': {'tones': [{'score': 0.828183, 'tone_id': 'joy', 'tone_name': 'Joy'}]}}
Joy 0.828183
{'document_tone': {'tones': [{'score': 0.632709, 'tone_id': 'sadness', 'tone_name': 'Sadness'}, {'score': 0.596122, 'tone_id': 'analytical', 'tone_name': 'Analytical'}, {'score': 0.932568, 'tone_id': 'confident', 'tone_name': 'Confident'}]}}
Sadness 0.632709
Analytical 0.596122
Confident 0.932568
{'document_tone': {'tones': [{'score': 0.536692, 'tone_id': 'fear', 'tone_name': 'Fear'}]}}
Fear 0.536692
{'document_tone': {'tones': [{'score': 0.828638, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}}
Analytical 0.82863

In [3]:
#df

In [133]:
genders = ['F', 'M', 'M', 'M', 'F', 'F', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'F', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'M', 'M', 'M', 'F', 'M', 'F', 'F', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M']
names = ["lisamurkowski",        
"SenJohnMcCain",
"JeffFlake",
"JohnBoozman",
"SenFeinstein",
"SenatorBoxer",
"MarkUdall",
"SenBennetCO",
"ChrisMurphyCT",
"SenBlumenthal",
"SenatorCarper",
"ChrisCoons",
"marcorubio",
"SaxbyChambliss",
"brianschatz",
"maziehirono",
"MikeCrapo",
"SenatorRisch",
"SenatorDurbin",
"SenDonnelly",
"SenDanCoats",
"ChuckGrassley",
"SenatorHarkin",
"SenPatRoberts",
"JerryMoran",
"SenRandPaul",
"SenLandrieu",
"DavidVitter",
"SenatorBarb",
"MarkeyMemo",
"stabenow",
"amyklobuchar",
"SenatorWicker",
"clairecmc",
"RoyBlunt",
"jontester",
"SenatorFischer",
"SenatorReid",
"SenDeanHeller",
"SenatorShaheen",
"kellyayotte",
"CoryBooker",
"SenatorMenendez",
"MartinHeinrich",
"SenatorTomUdall",
"SenSchumer",
"SenGillibrand",
"SenatorBurr",
"SenatorHagan",
"SenatorHeitkamp",
"SenJohnHoeven",
"SenSherrodBrown",
"robportman",
"jiminhofe",
"RonWyden",
"SenToomey",
"SenJackReed",
"SenWhitehouse",
"GrahamBlog",
"SenatorTimScott",
"SenJohnThune",
"SenMikeLee",
"SenatorLeahy",
"timkaine",
"SenRockefeller",
"Sen_JoeManchin",
"SenRonJohnson",
"SenatorEnzi",
"SenJohnBarrasso"]

In [134]:
len(names)

69

In [135]:
len(genders)

69

In [136]:
d = {}
for i, name in enumerate(names):
    d[name] = genders[i]

In [152]:
d['KellyAyotte'] = 'F'

In [4]:
# for i, name in enumerate(df.loc[:, 'politician_username']):
#     try:
#         df.loc[i, 'gender'] = d[name]
#     except KeyError as e:
#         df.loc[i, 'gender'] = 'M'
#         print(e, i, name)

In [146]:
for i, name in enumerate(df.loc[37700:38775, 'politician_username']):
    if name == 'KellyAyotte':
        df.loc[i, 'gender'] = 'F'

In [149]:
df.loc[37800]

politician_name                                              Kelly Ayotte
politician_username                                           KellyAyotte
tweet_text              Getting ready to rappell off Brady Sullivan wi...
tweet_retweet_count                                                     5
tweet_favorite_count                                                   68
Joy                                                                   NaN
Sadness                                                               NaN
Analytical                                                            NaN
Confident                                                             NaN
Fear                                                                  NaN
gender                                                                  M
Name: 37800, dtype: object

In [137]:
d

{'ChrisCoons': 'M',
 'ChrisMurphyCT': 'M',
 'ChuckGrassley': 'M',
 'CoryBooker': 'M',
 'DavidVitter': 'M',
 'GrahamBlog': 'M',
 'JeffFlake': 'M',
 'JerryMoran': 'M',
 'JohnBoozman': 'M',
 'MarkUdall': 'M',
 'MarkeyMemo': 'M',
 'MartinHeinrich': 'M',
 'MikeCrapo': 'M',
 'RonWyden': 'M',
 'RoyBlunt': 'M',
 'SaxbyChambliss': 'M',
 'SenBennetCO': 'M',
 'SenBlumenthal': 'M',
 'SenDanCoats': 'M',
 'SenDeanHeller': 'M',
 'SenDonnelly': 'M',
 'SenFeinstein': 'F',
 'SenGillibrand': 'F',
 'SenJackReed': 'M',
 'SenJohnBarrasso': 'M',
 'SenJohnHoeven': 'M',
 'SenJohnMcCain': 'M',
 'SenJohnThune': 'M',
 'SenLandrieu': 'F',
 'SenMikeLee': 'M',
 'SenPatRoberts': 'M',
 'SenRandPaul': 'M',
 'SenRockefeller': 'M',
 'SenRonJohnson': 'M',
 'SenSchumer': 'M',
 'SenSherrodBrown': 'M',
 'SenToomey': 'M',
 'SenWhitehouse': 'M',
 'Sen_JoeManchin': 'M',
 'SenatorBarb': 'F',
 'SenatorBoxer': 'F',
 'SenatorBurr': 'M',
 'SenatorCarper': 'M',
 'SenatorDurbin': 'M',
 'SenatorEnzi': 'M',
 'SenatorFischer': 'F',
 'Sen

In [190]:
tweet_batch = df.loc[:500, 'tweet_text']

In [191]:
tweet_batch = [tweet.replace('.', '--').replace('!', '--') for tweet in tweet_batch]

In [192]:
raw = tone_analyzer.tone(tone_input='. '.join(tweet_batch), content_type='text/plain', sentences=True)

In [193]:
tweet_batch[5]

'The FY18 government funding before the Senate has big wins for Alaska-- As chairman of the Interior Appropriations S------ https://t--co/1eFjj2wdNx'

In [5]:
#raw

In [185]:
tweet_batch[100]

'Speaking now to the Alaska State Legislature-- Watch live here: https://t--co/I17YMhkXGV'

In [164]:
tweet_batch[7]

'Today kids in AK joined thousands of young people across the U.S. for "Kick Butts Day." Proud to see so many youth... https://t.co/XfnQ33Mg36'

In [163]:
tweet_batch[8]

'We must protect our most vulnerable victims &amp; bring justice to all who are impacted by these crimes. This evening,... https://t.co/W4tqiuA8FL'

In [59]:
with open('data/min2.txt') as f:
    content = f.readlines()

In [6]:
#print(json.dumps(tone_analyzer.tone(tone_input=''.join(content[:100]), content_type='text/plain'), indent=2))

In [65]:
raw = tone_analyzer.tone(tone_input=''.join(content[:100]), content_type='text/plain')

In [7]:
#raw['sentences_tone']

In [72]:
c = Counter()
for sent_dict in raw['sentences_tone']:
    for tone in sent_dict['tones']:
        c[tone['tone_name']] += 1
#    if sent_dict['tones'] != []:
#        print(sent_dict['tones']
print(c)

Counter({'Joy': 27, 'Analytical': 15, 'Confident': 6, 'Sadness': 5, 'Tentative': 3, 'Fear': 1, 'Anger': 1})


In [2]:
utterances = [{'text': 'I am very happy.', 'user': 'glenn'},
              {'text': 'It is a good day.', 'user': 'glenn'}]
print(json.dumps(tone_analyzer.tone_chat(utterances), indent=2))

{
  "utterances_tone": [
    {
      "utterance_id": 0,
      "utterance_text": "I am very happy.",
      "tones": [
        {
          "score": 0.912181,
          "tone_id": "excited",
          "tone_name": "Excited"
        }
      ]
    },
    {
      "utterance_id": 1,
      "utterance_text": "It is a good day.",
      "tones": [
        {
          "score": 0.67376,
          "tone_id": "excited",
          "tone_name": "Excited"
        },
        {
          "score": 0.722718,
          "tone_id": "satisfied",
          "tone_name": "Satisfied"
        }
      ]
    }
  ]
}


In [3]:
arr = tone_analyzer.tone_chat(utterances)

In [4]:
arr

{'utterances_tone': [{'tones': [{'score': 0.912181,
     'tone_id': 'excited',
     'tone_name': 'Excited'}],
   'utterance_id': 0,
   'utterance_text': 'I am very happy.'},
  {'tones': [{'score': 0.67376, 'tone_id': 'excited', 'tone_name': 'Excited'},
    {'score': 0.722718, 'tone_id': 'satisfied', 'tone_name': 'Satisfied'}],
   'utterance_id': 1,
   'utterance_text': 'It is a good day.'}]}

In [50]:
print(json.dumps(tone_analyzer.tone(tone_input='I am very happy. It is a good day.',
                                    content_type="text/plain", sentences=False), indent=2))

{
  "document_tone": {
    "tones": [
      {
        "score": 0.961678,
        "tone_id": "joy",
        "tone_name": "Joy"
      },
      {
        "score": 0.983213,
        "tone_id": "confident",
        "tone_name": "Confident"
      }
    ]
  }
}


In [52]:
utterances

[{'text': 'I am very happy.', 'user': 'glenn'},
 {'text': 'It is a good day.', 'user': 'glenn'}]

In [53]:
print(json.dumps(tone_analyzer.tone(tone_input=utterances,
                                    content_type="text/plain"), indent=2))

{
  "document_tone": {
    "tones": []
  }
}


In [6]:
tone_analyzer.tone?

In [12]:
arr = []
for line in content:
    arr.append(tone_analyzer.tone(tone_input=line, content_type='text/plain', sentences=False))

In [13]:
arr

[{'document_tone': {'tones': []}},
 {'document_tone': {'tones': []}},
 {'document_tone': {'tones': []}},
 {'document_tone': {'tones': [{'score': 0.915262,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': []}},
 {'document_tone': {'tones': []}},
 {'document_tone': {'tones': [{'score': 0.783873,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': [{'score': 0.828183,
     'tone_id': 'joy',
     'tone_name': 'Joy'}]}},
 {'document_tone': {'tones': [{'score': 0.632709,
     'tone_id': 'sadness',
     'tone_name': 'Sadness'},
    {'score': 0.932568, 'tone_id': 'confident', 'tone_name': 'Confident'},
    {'score': 0.596122, 'tone_id': 'analytical', 'tone_name': 'Analytical'}]}},
 {'document_tone': {'tones': [{'score': 0.536692,
     'tone_id': 'fear',
     'tone_name': 'Fear'}]}},
 {'document_tone': {'tones': [{'score': 0.828638,
     'tone_id': 'analytical',
     'tone_name': 'Analytical'}]}},
 {'document_tone': {'tones': [{'score

In [39]:
arr = []
for i in range(100):
    arr.append(tone_analyzer.tone(tone_input=content[i], content_type='text/plain'))
arr

KeyboardInterrupt: 

In [38]:
with open('IBM-resources/tone-example.json') as tone_json:
    # print(tone_json.readlines())
    tone = tone_analyzer.tone(tone_input=json.load(tone_json)['text'],
                              content_type='text/plain')
tone

{'document_tone': {'tones': [{'score': 1.0,
    'tone_id': 'joy',
    'tone_name': 'Joy'}]}}

In [49]:
utterances = [{'text': 'I am very happy.', 'user': 'glenn'},
              {'text': 'It is a good day.', 'user': 'glenn'}]
print(json.dumps(tone_analyzer.tone(tone_input=utterances), indent=2))


TypeError: tone() missing 1 required positional argument: 'content_type'

In [41]:
tone_analyzer.tone_chat?

In [58]:
tone_analyzer.tone?

In [46]:
tone_analyzer.ToneInput

AttributeError: 'ToneAnalyzerV3' object has no attribute 'ToneInput'

In [47]:
ToneAnalyzerV3.ToneInput

AttributeError: type object 'ToneAnalyzerV3' has no attribute 'ToneInput'

In [56]:
with open('IBM-resources/tone-example.json') as tone_json:
    tone = tone_analyzer.tone(json.load(tone_json)['text'], "text/plain", sentences=True)

In [57]:
tone

{'document_tone': {'tones': [{'score': 1.0,
    'tone_id': 'joy',
    'tone_name': 'Joy'}]}}