# Libraries Used

In [1]:
import pandas as pd
import tweepy
import requests
import re
import emoji
from access_token import tokens

# Configuring Twitter API

In [2]:
auth = tweepy.OAuthHandler(tokens['api_token'], tokens['api_token_secret'])
auth.set_access_token(tokens['access_token'], tokens['access_token_secret'])
api = tweepy.API(auth, wait_on_rate_limit=True)

# Use Pandas DataFrame to store downloaded tweets

In [3]:
# Collect tweets
# set query 
query = "#trump" + " -filter:retweets"

# set date of which queries were created until
cutoff_date = "2022-03-19"

# get tweets at a specified number
tweets = tweepy.Cursor(api.search_tweets, q=query, lang="en", until=cutoff_date).items(200)

# get tweets information, in a form of date, user, location, text
tweets_list = [[tweet.created_at, tweet.user.screen_name, tweet.user.location, tweet.text] for tweet in tweets]

# convert to dataframe
tweets_df = pd.DataFrame(data=tweets_list, columns=['date', 'user', 'location', 'text'])

In [4]:
tweets_df

Unnamed: 0,date,user,location,text
0,2022-03-18 23:59:04+00:00,sosajoejr559,Hanford CA,45th President Donald J. Trump to Hold a Rally...
1,2022-03-18 23:59:01+00:00,W_Mead_,,@MarshaBlackburn #trump and the @GOP already t...
2,2022-03-18 23:58:57+00:00,LegalLining,The Big Apple,Russia prepared Trump to be Putin's Puppet for...
3,2022-03-18 23:58:33+00:00,RedCacheONLY,,Alex Jones on why Full Send Podcast Trump Inte...
4,2022-03-18 23:58:00+00:00,Theboveed,,Donald Trump wants his supporters to carry Tru...
...,...,...,...,...
195,2022-03-18 23:01:29+00:00,DorothyBeach,"Ohio, USA",@JudiciaryDems An honor that was not given to ...
196,2022-03-18 23:00:46+00:00,randomtrump1,,Random Trump https://t.co/wIrKt8Vmsq - #random...
197,2022-03-18 23:00:34+00:00,cozharz,Troy,This #atheist murderous KGB #dictator quoted #...
198,2022-03-18 23:00:07+00:00,DemActionToday,,@mazemoore #PutinsPuppet https://t.co/QNNLLMpQ...


In [5]:
tweets_df.to_csv ('test.csv', quotechar='"', encoding='utf8', index = False, header=True)

# Data Cleaning

In [15]:
# # Uncomment the line below if loading from previously saved CSV
tweets_df = pd.read_csv('test.csv', quotechar='"', encoding='utf8')

# Remove hyperlinks
tweets_df['text'] = tweets_df['text'].map(lambda x: re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', x))

# Remove punctuation
tweets_df['text'] = tweets_df['text'].map(lambda x: re.sub('[,\\.!?…]', ' ', x))

# Remove unnecessary line breaks
tweets_df['text'] = tweets_df['text'].map(lambda x: re.sub(r"\n", '', x))

# Convert the titles to lowercase
tweets_df['text'] = tweets_df['text'].map(lambda x: x.lower())

# Remove emojis
tweets_df['text'] = tweets_df['text'].map(lambda x: emoji.demojize(x))

# store the processed tweets to csv
tweets_df.to_csv ('test_cleaned.csv', quotechar='"', encoding='utf8', index = False, header=True)