Skip to content

Commit

Permalink
added Bayes
Browse files Browse the repository at this point in the history
  • Loading branch information
bernorieder committed Jun 25, 2018
1 parent e014578 commit 2e241ee
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
10 changes: 10 additions & 0 deletions tcat_trump_5510tweets_labeled.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
id,time,created_at,from_user_name,text,filter_level,possibly_sensitive,withheld_copyright,withheld_scope,truncated,retweet_count,favorite_count,lang,to_user_name,in_reply_to_status_id,quoted_status_id,source,location,lat,lng,from_user_id,from_user_realname,from_user_verified,from_user_description,from_user_url,from_user_profile_image_url,from_user_utcoffset,from_user_timezone,from_user_lang,from_user_tweetcount,from_user_followercount,from_user_friendcount,from_user_favourites_count,from_user_listed,from_user_withheld_scope,from_user_created_at,neg,neu,pos,compound,label
696329245866512384,1454852804,2016-02-07 13:46:44,realDonaldTrump,I will be on Meet the Press with Chuck Todd on NBC this morning. Enjoy! https://t.co/EIYyfFtnPs,none,0,,,0,1296,5194,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.8,0.2,0.5411,announce
696346322735988736,1454856875,2016-02-07 14:54:35,realDonaldTrump,.@ABCPolitics #GOPDebate #MakeAmericaGreatAgain #FITN https://t.co/jM6wrUGQox,none,0,,,0,2900,6908,und,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,1.0,0.0,0.0,communicate
696370121154150400,1454862549,2016-02-07 16:29:09,realDonaldTrump,Great to meet everyone while having breakfast @ChezVachon this morning! #FITN #VoteTrumpNH https://t.co/25UwkinUOd https://t.co/bmZvSmWe7Y,none,0,,,0,1725,5299,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.747,0.253,0.6588,pleased
696424442717601792,1454875500,2016-02-07 20:05:00,realDonaldTrump,"We are going to have a big event at the Verizon Wireless Arena in Manchester, New Hampshire! 5K+! Join us tomorrow: https://t.co/HVfhtIPcIp",none,0,,,0,1753,4914,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.878,0.122,0.4184,announce
696428451977433088,1454876456,2016-02-07 20:20:56,realDonaldTrump,Thank you Newt! https://t.co/6FkwdpI0Oj,none,0,,,0,2250,6491,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.518,0.482,0.4199,pleased
696447301783592961,1454880950,2016-02-07 21:35:50,realDonaldTrump,"Thank you- Plymouth, New Hampshire! #FITN #NHPrimary https://t.co/1hzn6iZIEw",none,0,,,0,1952,6200,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.715,0.285,0.4199,pleased
696463477104365568,1454884807,2016-02-07 22:40:07,realDonaldTrump,I am in New Hampshire having a great time! Loved the #GOPDebate last night! Everybody enjoy the Super Bowl. #SuperBowlSunday #SB50,none,,,,0,2315,9074,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.489,0.511,0.9492,pleased
696514666843996160,1454897011,2016-02-08 02:03:31,realDonaldTrump,So far the Super Bowl is very boring - not nearly as exciting as politics - MAKE AMERICA GREAT AGAIN!,none,,,,0,15779,30735,en,,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.183,0.491,0.326,0.7242,grumpy
696665030549487616,1454932861,2016-02-08 12:01:01,realDonaldTrump,"My two wonderful sons, Don and Eric, will be on @foxandfriends at 7:02 - now! Enjoy.",none,,,,0,1034,5320,en,,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.644,0.356,0.8016,announce
56 changes: 56 additions & 0 deletions test_bayes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# a little script that takes a TCAT tweet file as input, runs them through NLTK/VADER sentiment analysis
# and writes a new files that adds four columns on the right

# The VADER library is documented here:
# Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text.
# Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.

# change as required:
filename_labeled = 'tcat_trump_5510tweets_labeled.csv'
colname_labeled_text = 'text'
colname_labeled_label = 'label'

filename_tolabel = 'tcat_trump_5510tweets.csv'
colname_tolabel_text = 'text'


import csv
import nltk
from nltk.tokenize import word_tokenize

csvread_labeled = open(filename_labeled, newline='\n')
csvreader_labeled = csv.DictReader(csvread_labeled, delimiter=',', quotechar='"')

# populate dictionary from CSV
train=[]
for row in csvreader_labeled:
train.append((row[colname_labeled_text].lower(),row[colname_labeled_label]))

# create the overall feature vector:
all_words = set(word for passage in train for word in word_tokenize(passage[0]))

# create a feature vector for each text passage
t = [({word: (word in word_tokenize(x[0])) for word in all_words}, x[1]) for x in train]

# train the classifier
classifier = nltk.NaiveBayesClassifier.train(t)
# classifier.show_most_informative_features()

# read the lines to label and write to new file
csvread_tolabel = open(filename_tolabel, newline='\n')
csvreader_tolabel = csv.DictReader(csvread_tolabel, delimiter=',', quotechar='"')
rowcount = len(open(filename_tolabel).readlines())

colnames = csvreader_tolabel.fieldnames
colnames.extend(['label'])

csvwrite = open(filename_tolabel[:-4] + "_BAYES.csv",'w',newline='\n')
csvwriter = csv.DictWriter(csvwrite, fieldnames=colnames)
csvwriter.writeheader()

for row in csvreader_tolabel:
line_features = {word: (word in word_tokenize(row[colname_tolabel_text].lower())) for word in all_words}
row.update({'label':classifier.classify(line_features)})
csvwriter.writerow(row)
rowcount -= 1
print(rowcount)

0 comments on commit 2e241ee

Please sign in to comment.