# Fetch Tweets

Donwload and save tweets, using a **query** value

In [13]:
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('../.env').resolve()
load_dotenv(dotenv_path=env_path)

True

## API access

First of all, we'll connect to the Twitter API

In [14]:
import os

In [15]:
consumer_key = os.getenv("CONSUMER_KEY")
consumer_secret = os.getenv("CONSUMER_SECRET")
access_token = os.getenv("ACCESS_TOKEN")
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET")

In [16]:
from tweepy import OAuthHandler, API, TweepError

In [17]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
print('Successfully connected to the Twitter API.')

Successfully connected to the Twitter API.


## Search Tweets

Now we can define our query and search for the tweets containing it.

- **query**: *hashtag* or *emoji* that will be used to fetch the tweets
- **max_requests**: Maximum number of requests to the API.
    - Restriction: 180 requests / 15 min window

In [18]:
query = '#RIPLewis'
max_requests = 180

In [19]:
# Converts aliases to the real emoji representation (e.g. :thumbs_up: => 👍)

from emoji import emojize

In [20]:
q = emojize(query) + ' -filter:retweets'
searched_tweets = []
last_id = -1
request_count = 0
while request_count < max_requests:
    try:
        new_tweets = api.search(q=q,
                                lang='en',
                                count=100,
                                max_id=str(last_id - 1),
                                tweet_mode='extended')
        if not new_tweets:
            break
        searched_tweets.extend(new_tweets)
        last_id = new_tweets[-1].id
        request_count += 1
    except TweepError as e:
        print(e)
        break

## Format and save

Format the API data to the desired structure and save a `.csv` file

In [21]:
import pandas as pd

In [22]:
data = []
for tweet in searched_tweets:
    data.append([tweet.id, tweet.created_at, tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data=data, columns=['id', 'date', 'user', 'text'])
print(str(len(data)) + ' ' + query + ' tweets')

2683 #RIPLewis tweets


In [23]:
df.head()

Unnamed: 0,id,date,user,text
0,1199615410137681920,2019-11-27 09:06:16,KnockaFN,"#RIPLewis, the little koala passed away that i..."
1,1199614317135638529,2019-11-27 09:01:55,7_revealed,I’m glad Lewis the Koala was rescued. His bod...
2,1199613738925592577,2019-11-27 08:59:37,viki__xx,heartbreaking news! 💔 #RIPLewis. https://t.co/...
3,1199611053056937985,2019-11-27 08:48:57,orivios,I just realized #RIPLewis was about a Koala th...
4,1199610786601152512,2019-11-27 08:47:53,AnayiaMelanin,We got kicked out the science center btw but i...


In [24]:
PATH = Path('../datasets/tweepy').resolve()
filename = query + '.csv'
df.to_csv(os.path.join(PATH, filename), index=None)
print('Saved under: "' + PATH.as_posix() + '"')

Saved under: "/Users/vladislavklyuev/Desktop/Thesis/Realisation/emotion-from-tweet-1.0.0/datasets/tweepy"
