-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e014578
commit 2e241ee
Showing
2 changed files
with
66 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
id,time,created_at,from_user_name,text,filter_level,possibly_sensitive,withheld_copyright,withheld_scope,truncated,retweet_count,favorite_count,lang,to_user_name,in_reply_to_status_id,quoted_status_id,source,location,lat,lng,from_user_id,from_user_realname,from_user_verified,from_user_description,from_user_url,from_user_profile_image_url,from_user_utcoffset,from_user_timezone,from_user_lang,from_user_tweetcount,from_user_followercount,from_user_friendcount,from_user_favourites_count,from_user_listed,from_user_withheld_scope,from_user_created_at,neg,neu,pos,compound,label | ||
696329245866512384,1454852804,2016-02-07 13:46:44,realDonaldTrump,I will be on Meet the Press with Chuck Todd on NBC this morning. Enjoy! https://t.co/EIYyfFtnPs,none,0,,,0,1296,5194,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.8,0.2,0.5411,announce | ||
696346322735988736,1454856875,2016-02-07 14:54:35,realDonaldTrump,.@ABCPolitics #GOPDebate #MakeAmericaGreatAgain #FITN https://t.co/jM6wrUGQox,none,0,,,0,2900,6908,und,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,1.0,0.0,0.0,communicate | ||
696370121154150400,1454862549,2016-02-07 16:29:09,realDonaldTrump,Great to meet everyone while having breakfast @ChezVachon this morning! #FITN #VoteTrumpNH https://t.co/25UwkinUOd https://t.co/bmZvSmWe7Y,none,0,,,0,1725,5299,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.747,0.253,0.6588,pleased | ||
696424442717601792,1454875500,2016-02-07 20:05:00,realDonaldTrump,"We are going to have a big event at the Verizon Wireless Arena in Manchester, New Hampshire! 5K+! Join us tomorrow: https://t.co/HVfhtIPcIp",none,0,,,0,1753,4914,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.878,0.122,0.4184,announce | ||
696428451977433088,1454876456,2016-02-07 20:20:56,realDonaldTrump,Thank you Newt! https://t.co/6FkwdpI0Oj,none,0,,,0,2250,6491,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.518,0.482,0.4199,pleased | ||
696447301783592961,1454880950,2016-02-07 21:35:50,realDonaldTrump,"Thank you- Plymouth, New Hampshire! #FITN #NHPrimary https://t.co/1hzn6iZIEw",none,0,,,0,1952,6200,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.715,0.285,0.4199,pleased | ||
696463477104365568,1454884807,2016-02-07 22:40:07,realDonaldTrump,I am in New Hampshire having a great time! Loved the #GOPDebate last night! Everybody enjoy the Super Bowl. #SuperBowlSunday #SB50,none,,,,0,2315,9074,en,,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.489,0.511,0.9492,pleased | ||
696514666843996160,1454897011,2016-02-08 02:03:31,realDonaldTrump,So far the Super Bowl is very boring - not nearly as exciting as politics - MAKE AMERICA GREAT AGAIN!,none,,,,0,15779,30735,en,,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.183,0.491,0.326,0.7242,grumpy | ||
696665030549487616,1454932861,2016-02-08 12:01:01,realDonaldTrump,"My two wonderful sons, Don and Eric, will be on @foxandfriends at 7:02 - now! Enjoy.",none,,,,0,1034,5320,en,,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","New York, NY",,,25073877,Donald J. Trump,1,,https://t.co/mZB2hymxC9,http://pbs.twimg.com/profile_images/1980294624/DJT_Headshot_V2_normal.jpg,-14400,Eastern Time (US & Canada),en,33872,12843276,41,44,40746,,2009-03-18 13:46:38,0.0,0.644,0.356,0.8016,announce |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# a little script that takes a TCAT tweet file as input, runs them through NLTK/VADER sentiment analysis | ||
# and writes a new files that adds four columns on the right | ||
|
||
# The VADER library is documented here: | ||
# Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text. | ||
# Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. | ||
|
||
# change as required: | ||
filename_labeled = 'tcat_trump_5510tweets_labeled.csv' | ||
colname_labeled_text = 'text' | ||
colname_labeled_label = 'label' | ||
|
||
filename_tolabel = 'tcat_trump_5510tweets.csv' | ||
colname_tolabel_text = 'text' | ||
|
||
|
||
import csv | ||
import nltk | ||
from nltk.tokenize import word_tokenize | ||
|
||
csvread_labeled = open(filename_labeled, newline='\n') | ||
csvreader_labeled = csv.DictReader(csvread_labeled, delimiter=',', quotechar='"') | ||
|
||
# populate dictionary from CSV | ||
train=[] | ||
for row in csvreader_labeled: | ||
train.append((row[colname_labeled_text].lower(),row[colname_labeled_label])) | ||
|
||
# create the overall feature vector: | ||
all_words = set(word for passage in train for word in word_tokenize(passage[0])) | ||
|
||
# create a feature vector for each text passage | ||
t = [({word: (word in word_tokenize(x[0])) for word in all_words}, x[1]) for x in train] | ||
|
||
# train the classifier | ||
classifier = nltk.NaiveBayesClassifier.train(t) | ||
# classifier.show_most_informative_features() | ||
|
||
# read the lines to label and write to new file | ||
csvread_tolabel = open(filename_tolabel, newline='\n') | ||
csvreader_tolabel = csv.DictReader(csvread_tolabel, delimiter=',', quotechar='"') | ||
rowcount = len(open(filename_tolabel).readlines()) | ||
|
||
colnames = csvreader_tolabel.fieldnames | ||
colnames.extend(['label']) | ||
|
||
csvwrite = open(filename_tolabel[:-4] + "_BAYES.csv",'w',newline='\n') | ||
csvwriter = csv.DictWriter(csvwrite, fieldnames=colnames) | ||
csvwriter.writeheader() | ||
|
||
for row in csvreader_tolabel: | ||
line_features = {word: (word in word_tokenize(row[colname_tolabel_text].lower())) for word in all_words} | ||
row.update({'label':classifier.classify(line_features)}) | ||
csvwriter.writerow(row) | ||
rowcount -= 1 | ||
print(rowcount) |