In [1]:
# import libraries
import pandas as pd
import numpy as np

In [2]:
# read the csv into a dataframe
tweets = pd.read_csv("tot_tweets.csv")
tweets = tweets.drop(columns=['id'])

In [3]:
tweets.head()

Unnamed: 0,location,text
0,"Tennessee, USA","b""RT @AliAdair22: \xf0\x9f\x90\xa6Next, Beto O..."
1,,"b""RT @AliAdair22: \xf0\x9f\x90\xa6Next, Beto O..."
2,,"b""Ted Cruz, Beto O'Rourke try to rally Latino ..."
3,America,b'RT @RonNehring: Third poll now showing Cruz ...
4,West Texas,"b'Beto O\xe2\x80\x99Rourke, the Democratic con..."


In [4]:
# Let's write a function to take care of the names
def replace_names(text):
    
    text = text.lower()
    
    beto_words = ['@betoorourke' , "beto o'rourke", "o'rourke", 'rourke']
    cruz_words = ['@tedcruz', 'ted cruz', 'ted']
    
    for w in beto_words:
        try:
            text = text.replace(w, 'beto')
        except:
            pass
    for w in cruz_words:
        try:
            text = text.replace(w, 'cruz')
        except:
            pass
    
    return text

In [5]:
# apply replace_names() on each tweet
text_column = []
for t in tweets.itertuples():
    text = t.text
    new_text = replace_names(text)
    text_column.append(new_text)

In [6]:
# replace old text with new text
tweets.text = text_column

In [7]:
for i in tweets.itertuples():
    text = i.text
    print(text)
    break

b"rt @aliadair22: \xf0\x9f\x90\xa6next, beto beto, running for u.s. senate in #texas. please follow, tweet, contribute, volunteer, anything\xe2\x80\xa6"


In [8]:
# word frequency
# grab word frequency using nltk library
import nltk
# stop words
from nltk.corpus import stopwords

In [9]:
# put all tweets together
text=tweets["text"].values.tolist()
text=" ".join(text)
text = text.lower()

In [10]:
# tokenize the text 
tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text)

In [11]:
# let's do a word count
from collections import Counter

# counter object
words = Counter()

# update counter with new words
words.update(tokens)

In [12]:
# first order check
words.most_common()[:10]

[('xe2', 6577),
 ('x80', 6452),
 ('b', 5743),
 ('rt', 4791),
 ('the', 4411),
 ('xa6', 3838),
 ('texas', 3134),
 ('in', 3112),
 ('beto', 3107),
 ('cruz', 3104)]

In [13]:
# store the stop words
stopwords = stopwords.words('english')[:]

In [14]:
# lets do this again: remove stop words, single letter variable names
filter_words = Counter(x for x in tokens if x not in stopwords and x.isdigit() == False and len(x) != 1 and x.isalpha() == True)

In [15]:
filter_words.most_common()[:1000]

[('rt', 4791),
 ('texas', 3134),
 ('beto', 3107),
 ('cruz', 3104),
 ('senate', 3003),
 ('https', 2088),
 ('co', 2007),
 ('race', 1956),
 ('vote', 877),
 ('single', 785),
 ('msnbc', 735),
 ('word', 722),
 ('win', 703),
 ('every', 662),
 ('care', 656),
 ('repeal', 652),
 ('act', 652),
 ('pre', 652),
 ('insiscruz', 651),
 ('affordable', 651),
 ('including', 651),
 ('protections', 651),
 ('existing', 650),
 ('condi', 650),
 ('debate', 502),
 ('nplease', 491),
 ('believe', 487),
 ('retweet', 483),
 ('sample', 483),
 ('bigger', 479),
 ('size', 478),
 ('guy', 401),
 ('leads', 352),
 ('show', 345),
 ('skateboard', 343),
 ('still', 337),
 ('side', 336),
 ('election', 334),
 ('polling', 331),
 ('skates', 327),
 ('across', 327),
 ('stage', 327),
 ('hoping', 327),
 ('dont', 327),
 ('history', 319),
 ('texassenate', 301),
 ('million', 289),
 ('want', 256),
 ('looking', 241),
 ('cbsnews', 238),
 ('ahead', 237),
 ('problem', 237),
 ('integrity', 235),
 ('serious', 232),
 ('becaus', 232),
 ('campaign'

In [16]:
pd.set_option('display.max_colwidth', -1)

In [17]:
# lets search for issues
tweets[tweets['text'].str.contains("proud")]

Unnamed: 0,location,text
447,"Edinburg, TX","b'@prouddemocrat1 @stephenking in house races, yes, because people are voting for a rep for their district. senators\xe2\x80\xa6 https://t.co/a8yclc3bhh'"
2772,"DALLAS, TX",b'betoobeto you proudly have my #support. please make sure you do your part and vote in the midterm elections... n\xe2\x80\xa6 https://t.co/adnb5uo6rl'
2773,McHenry/Kane Counties - IL,b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2785,"Kane County, IL",b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2798,"Kane County, IL",b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2815,United States,b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2824,,b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2843,"Tennessee, USA",b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2844,"Texas, USA",b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'
2845,NW Montana,b'rt @rgv3307: we are proud to support &amp; endorse \xe2\x81\xa6cruz\xe2\x81\xa9 for #texassenate #toughastexas #bordersecurity #lawenforcement advocate https://t\xe2\x80\xa6'


In [18]:
# Let's write a function to take care of the ISSUES
def replace_issues(text):
    
    text = text.lower()
    
    healthcare = ['affordable care act' , "healthcare", "aca"]
    taxes = ['tax', 'taxes']
    climatechange = ['climate', 'climatechange']
    bordercontrol = ['border', 'immigration', 'bordersecurity', 'borders']
    character = ['lyin', 'leader']
    
    for w in healthcare:
        try:
            text = text.replace(w, ' healthcare ')
        except:
            pass
    for w in taxes:
        try:
            text = text.replace(w, ' taxes ')
        except:
            pass
    for w in climatechange:
        try:
            text = text.replace(w, ' climatechange ')
        except:
            pass
    for w in bordercontrol:
        try:
            text = text.replace(w, ' bordercontrol ')
        except:
            pass

    for w in character:
        try:
            text = text.replace(w, ' character ')
        except:
            pass
    
    return text

In [19]:
# apply replace_issues() on each tweet
text_column = []
for t in tweets.itertuples():
    text = t.text
    new_text = replace_issues(text)
    text_column.append(new_text)

In [20]:
# replace old text with new text
tweets.text = text_column

In [21]:
# do another word count
# put all tweets together
text=tweets["text"].values.tolist()
text=" ".join(text)
text = text.lower()

# tokenize the text 
tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text)

# counter object
words = Counter()

# update counter with new words
words.update(tokens)

In [22]:
# issues
issues = ['healthcare', 'taxes', 'climatechange', 'bordercontrol', 'character']

In [23]:
# lets do this again: remove stop words, single letter variable names
filter_words = Counter(x for x in tokens if x not in stopwords and x.isdigit() == False and len(x) != 1 and x.isalpha() == True and x in issues)

In [24]:
filter_words.most_common()[:]

[('healthcare', 730),
 ('character', 289),
 ('taxes', 196),
 ('bordercontrol', 64),
 ('climatechange', 36)]

In [25]:
beto_count = 0
cruz_count = 0
n_tweets = 0
for t in tweets.itertuples():
    if 'beto' in t.text and 'cruz' in t.text:
        beto_count += 1
        cruz_count += 1
        n_tweets += 1
    elif 'beto' in t.text:
        beto_count += 1
        n_tweets += 1
    elif 'cruz' in t.text:
        cruz_count += 1        
        n_tweets += 1

In [26]:
healthcare_count = 0
character_count = 0
taxes_count = 0
bordercontrol_count = 0
for t in tweets.itertuples():
    if 'healthcare' in t.text:
        healthcare_count += 1
    elif 'character' in t.text:
        character_count += 1
    elif 'taxes' in t.text:
        taxes_count += 1
    elif 'bordercontrol' in t.text:
        bordercontrol_count += 1

In [66]:
# beto and issues
bh_count = 0
bc_count = 0
bt_count = 0
bb_count = 0

# cruz and issues
ch_count = 0
cc_count = 0
ct_count = 0
cb_count = 0

for t in tweets.itertuples():
    # beto
    if 'beto' in t.text and 'healthcare' in t.text:
        bh_count += 1
    if 'beto' in t.text and 'character' in t.text:
        bc_count += 1
    if 'beto' in t.text and 'taxes' in t.text:
        bt_count += 1
    if 'beto' in t.text and 'bordercontrol' in t.text:
        bb_count += 1
    
    # cruz
    if 'cruz' in t.text and 'healthcare' in t.text:
        ch_count += 1
    if 'cruz' in t.text and 'character' in t.text:
        cc_count += 1
    if 'cruz' in t.text and 'taxes' in t.text:
        ct_count += 1
    if 'cruz' in t.text and 'bordercontrol' in t.text:
        cb_count += 1

In [449]:
candidate_counts = [beto_count, cruz_count]
issue_counts = [healthcare_count, character_count, taxes_count, bordercontrol_count]
combo_counts = [bh_count, bc_count, bt_count, bb_count, ch_count, cc_count, ct_count, cb_count]
print(combo_counts)
lifts = [] # order: [0] beto vs healthcare, [1] beto vs character, [2] beto vs taxes, [3] beto vs bordercontrol
           #        [4] cruz vs healthcare, [5] cruz vs character, [6] cruz vs taxes, [7] cruz vs bordercontrol
index = 0
for cand_count in candidate_counts:
    for issue_count in issue_counts:
        lifts.append( (len(tweets) * combo_counts[index]) / (cand_count * issue_count) )
        index +=1 # the index should be in the second iteration


[71, 270, 83, 14, 657, 87, 82, 38]


In [29]:
lifts = np.reshape(lifts, (2, 4)).T # reshape for similarities matrix
print(lifts)

[[0.17185941 1.54651302]
 [1.67400496 0.52454828]
 [1.27531682 1.22525669]
 [0.41230122 1.08828694]]


In [30]:
similarities = pd.DataFrame(lifts, columns=['beto', 'cruz'], index=['healthcare', 'character', 'taxes', 'bordercontrol'])

In [31]:
similarities

Unnamed: 0,beto,cruz
healthcare,0.171859,1.546513
character,1.674005,0.524548
taxes,1.275317,1.225257
bordercontrol,0.412301,1.088287


In [159]:
import pickle
import pandas as pd
import numpy as np

In [160]:
df=pd.read_csv('AFINN-111.csv',header=None,sep='\t',names=["word",'score',])

In [161]:
inversedic = pd.read_csv("inversdic.csv",header=None)
inversedic

Unnamed: 0,0
0,not
1,isn't
2,aren't
3,didn't
4,don't
5,wasn't
6,weren't
7,none
8,neither


In [236]:
tweets_test=tweets.iloc[0:6000,1:]

In [237]:
def judgeodd(num):
    if (num/2)*2 == num:
        return 'even'
    else:
        return 'odd'

In [238]:
count_list=[]
n=0
for t in tweets_test.itertuples():
    sum_count = 0
    for m in df.itertuples():
        if m.word in t.text.split(' '):
            c = 0
            a =t.text.split(' ').index(m.word)
            for w in t.text.split(' ')[:a]:
                if w in inversedic.itertuples():
                    c+=1
            if judgeodd(c) == 'odd':
                sum_count=sum_count-m.score

            else:
                sum_count=sum_count+m.score

    count_list.append(sum_count)

In [239]:
n=0
m=0
for i in count_list:
    if i >0:
        n+=1
    elif i <0:
        m+=1       
print(n,m)
    

2004 469


In [240]:
tweets_test['sentiment']=count_list

In [241]:
tweets_test

Unnamed: 0,text,sentiment
0,"b""rt @aliadair22: \xf0\x9f\x90\xa6next, beto beto, running for u.s. senate in #texas. please follow, tweet, contribute, volunteer, anything\xe2\x80\xa6""",1
1,"b""rt @aliadair22: \xf0\x9f\x90\xa6next, beto beto, running for u.s. senate in #texas. please follow, tweet, contribute, volunteer, anything\xe2\x80\xa6""",1
2,"b""cruz, beto try to rally latino voters in texas senate race https://t.co/ehvo0zz7yh #foxnews @mriverafoxnews""",0
3,b'rt @ronnehring: third poll now showing cruz with 8 or 9 point solid lead over beto o\xe2\x80\x99beto (d-hollywood). follows quinnipiac and ny times\xe2\x80\xa6',2
4,"b'beto o\xe2\x80\x99beto, the democratic congressman from el paso, has made the texas race for us senate one of the most widel\xe2\x80\xa6 https://t.co/f9i3qqej6u'",0
5,b'rt @wfaa: does texas\xe2\x80\x99 senate race belong to texans? https://t.co/ccbbwh5pmt https://t.co/gxnrzdjhid',0
6,"b""rt @lizlogan76: beto could win this, because of healthcare alone. this is the winning issue, get the word out, vote for beto if you valu\xe2\x80\xa6""",8
7,"b""rt @foxnews: cruz, beto try to rally latino voters in the texas senate race. https://t.co/ynxbe6uvpb""",0
8,"b""rt @patriot_musket: lots of really good news for house and senate republicans in this weekend's and today's polls. \n\nand lol that cook stil\xe2\x80\xa6""",6
9,"b""rt @amhotflash: beto's campaign has raised more money in a single quarter, than any senate race in the history of this country. #beto is ma\xe2\x80\xa6""",0


In [425]:
df_p1=tweets_test.loc[tweets_test['sentiment'] > 0]
df_z=tweets_test.loc[tweets_test['sentiment'] == 0]
df_n=tweets_test.loc[tweets_test['sentiment'] < 0]
df_p=df_p1.iloc[1:400,:]

In [426]:
import nltk
from nltk.collocations import  BigramCollocationFinder
from nltk.metrics import  BigramAssocMeasures

In [427]:
def bag_of_words(words):
    return dict([(word,True) for word in words])



In [428]:
negFeatures=[]
posFeatures=[]
for i in df_n.itertuples():
    negWords = [ bag_of_words(i.text.split(' ')),'neg']
    negFeatures.append(negWords)
print(negFeatures[0])
for i in df_p.itertuples():
    posWords = [ bag_of_words(i.text.split(' ')),'pos']
    posFeatures.append(posWords)
print(posFeatures[0])

[{"b'if": True, 'you\\xe2\\x80\\x99re': True, 'latina/latino': True, 'and': True, 'vote': True, 'for': True, 'terrible': True, 'cruz,': True, 'you': True, 'should': True, 'no': True, 'longer': True, 'claim': True, 'a': True, 'heritage.': True, '\\n\\ncruz,\\xe2\\x80\\xa6': True, "https://t.co/jzijdvjkz1'": True}, 'neg']
[{'b"rt': True, '@aliadair22:': True, '\\xf0\\x9f\\x90\\xa6next,': True, 'beto': True, 'beto,': True, 'running': True, 'for': True, 'u.s.': True, 'senate': True, 'in': True, '#texas.': True, 'please': True, 'follow,': True, 'tweet,': True, 'contribute,': True, 'volunteer,': True, 'anything\\xe2\\x80\\xa6"': True}, 'pos']


In [429]:
from nltk.classify.scikitlearn import  SklearnClassifier
from sklearn.linear_model import  LogisticRegression
#print('LogisticRegression`s accuracy is  %f' %score(LogisticRegression()))

In [430]:
from random import shuffle
  
shuffle(posFeatures) #把文本的排列随机化
  
shuffle(negFeatures) #把文本的排列随机化
  
train =  posFeatures[30:]+negFeatures[30:]#训练集(80%)
  
test = posFeatures[:30]+negFeatures[:30]#预测集(验证集)(20%)
  
data,tag = zip(*test)#分离测试集合的数据和标签，便于验证和测试

print(data[1])
print(tag[1])
print(len(train))

{"b'rt": True, '@crimsonfaith88:': True, '\\xf0\\x9f\\x9b\\x91beto': True, 'skates': True, 'across': True, 'the': True, 'stage': True, 'on': True, 'his': True, 'skateboard.': True, 'hoping': True, 'to': True, 'show': True, '\\xe2\\x80\\x9ccool': True, 'guy\\xe2\\x80\\x9d': True, 'side.': True, '': True, 'cruz': True, 'still': True, 'leads': True, 'in': True, "polling.dont\\xe2\\x80\\xa6'": True}
pos
808


In [431]:
from nltk.classify.scikitlearn import  SklearnClassifier
from sklearn.linear_model import  LogisticRegression
from sklearn.metrics import  accuracy_score


In [432]:

  
classifier = SklearnClassifier(LogisticRegression()) #在nltk中使用scikit-learn的接口
classifier.train(train) #训练分类器
  
pred = classifier.classify_many(data) #对测试集的数据进行分类，给出预测的标签


In [433]:
right=0
i=0
while i <len(pred):
    if pred[i]==tag[i]:
        right+=1
    i+=1
print(right/len(pred))

0.9833333333333333


In [434]:
prepareFeatures=[]
for i in df_z.itertuples():
    prepare_Words = bag_of_words(i.text.split(' '))
    prepareFeatures.append(prepare_Words)
print (prepareFeatures[0])

{'b"cruz,': True, 'beto': True, 'try': True, 'to': True, 'rally': True, 'latino': True, 'voters': True, 'in': True, 'texas': True, 'senate': True, 'race': True, 'https://t.co/ehvo0zz7yh': True, '#foxnews': True, '@mriverafoxnews"': True}


In [435]:
pred_zero=classifier.classify_many(prepareFeatures) 

In [436]:
n=0 
for i in pred_zero:
    if i =='pos':
        n+=1
print(n)

152


In [437]:
print(len(pred_zero))

3268


In [438]:
df_z['classifier']=pred_zero
df_p1['classifier']='pos'
df_n['classifier']='neg'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [439]:
result = df_n.append(df_z)
result1=result.append(df_p1)
result1

Unnamed: 0,text,sentiment,classifier
98,"b'if you\xe2\x80\x99re latina/latino and vote for terrible cruz, you should no longer claim a latina/latino heritage. \n\ncruz,\xe2\x80\xa6 https://t.co/jzijdvjkz1'",-4,neg
187,b'rt @bill1601: @lucihoneychurch @thehill too bad since you seem to know so much about the texas senate race.',-3,neg
217,b'rt @kfox14: the republican side of the senate race made a stop in el paso. @kayleekfox_cbs explains how both men differentiacruz their campa\xe2\x80\xa6',-1,neg
228,"b'rt @xan_desanctis: can someone explain to me why we\xe2\x80\x99ve gotten no fewer than five polls of the texas senate race in the last month, but a to\xe2\x80\xa6'",-1,neg
229,"b'rt @xan_desanctis: can someone explain to me why we\xe2\x80\x99ve gotten no fewer than five polls of the texas senate race in the last month, but a to\xe2\x80\xa6'",-1,neg
230,"b'rt @xan_desanctis: can someone explain to me why we\xe2\x80\x99ve gotten no fewer than five polls of the texas senate race in the last month, but a to\xe2\x80\xa6'",-1,neg
231,"b'can someone explain to me why we\xe2\x80\x99ve gotten no fewer than five polls of the texas senate race in the last month, but\xe2\x80\xa6 https://t.co/yh9jn1nvqf'",-1,neg
419,"b'@southerngaldec @johncornyn girl, leave that crazy in georgia where it belongs &amp; let the adults in texas worry abou\xe2\x80\xa6 https://t.co/k94vjzlzzz'",-6,neg
474,"b""the media saw what they wancruz to see in the texas senate race. too bad for beto it wasn't reality.\xe2\x80\xa6 https://t.co/ozaaubvmzl""",-3,neg
497,b'rt @kfox14: the republican side of the senate race made a stop in el paso. @kayleekfox_cbs explains how both men differentiacruz their campa\xe2\x80\xa6',-1,neg


In [441]:
n_n1=0
n_p1=0
n_n2=0
n_p2=0
n_n3=0
n_p3=0
n_n4=0
n_p4=0
n_n5=0
n_p5=0
for t in result1.itertuples():
    # beto
    if 'beto' in t.text and 'character' in t.text:
            if t.classifier=='pos':
                n_p1+=1
            elif t.classifier=='neg':
                n_n1+=1
    try: 
        a1=n_p1+n_n1
        b1=n_p1/a1
    
    except:
            pass

    if 'beto' in t.text and 'taxes' in t.text:
            if t.classifier=='pos':
                n_p2+=1
            elif t.classifier=='neg':
                n_n2+=1
    try: 
        a2=n_p2+n_n2
        b2=n_p2/a2
    
    except:
            pass
    
    # cruz
    if 'cruz' in t.text and 'healthcare' in  t.text:
        if t.classifier=='pos':
                n_p3+=1
        elif t.classifier=='neg':
                n_n3+=1
    try: 
        a3=n_p3+n_n3
        b3=n_p3/a3
    except:
            pass
    if 'cruz' in t.text and 'taxes' in t.text:
        if t.classifier=='pos':
                n_p4+=1
        elif t.classifier=='neg':
                n_n4+=1
    try: 
        a4=n_p4+n_n4
        b4=n_p4/a4
    except:
            pass
    if 'cruz' in t.text and 'bordercontrol' in t.text:
        if t.classifier=='pos':
                n_p5+=1
        elif t.classifier=='neg':
                n_n5+=1
    try: 
        a5=n_p5+n_n5
        b5=n_p5/a5
    except:
            pass
    
print(b1,b2,b3, b4,b5)


0.7037037037037037 0.9759036144578314 0.0091324200913242 0.9878048780487805 0.9473684210526315


In [445]:
final_table = pd.DataFrame(None, columns=['lift', 'sentiment score(goodness)','sentiment score(badness)'], index=['healthcare & beto', 'character& beto', 'taxes & beto', 'bordercontrol & beto',
                                                                             'healthcare & cruz','character& cruz','taxes & cruz','bordercontrol & cruz'])

In [446]:
final_table['lift']=lifts 
final_table['sentiment score(goodness)']['character& beto']=b1
final_table['sentiment score(goodness)']['taxes & beto']=b2
final_table['sentiment score(goodness)']['healthcare & cruz']=b3
final_table['sentiment score(goodness)']['taxes & cruz']=b4
final_table['sentiment score(goodness)']['bordercontrol & cruz']=b5
final_table['sentiment score(badness)']['character& beto']=1-b1
final_table['sentiment score(badness)']['taxes & beto']=1-b2
final_table['sentiment score(badness)']['healthcare & cruz']=1-b3
final_table['sentiment score(badness)']['taxes & cruz']=1-b4
final_table['sentiment score(badness)']['bordercontrol & cruz']=1-b5



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://p

In [447]:
final_table

Unnamed: 0,lift,sentiment score(goodness),sentiment score(badness)
healthcare & beto,0.171859,,
character& beto,1.674005,0.703704,0.296296
taxes & beto,1.275317,0.975904,0.0240964
bordercontrol & beto,0.412301,,
healthcare & cruz,1.546513,0.00913242,0.990868
character& cruz,0.524548,,
taxes & cruz,1.225257,0.987805,0.0121951
bordercontrol & cruz,1.088287,0.947368,0.0526316
