# Fetch Tweets

Donwload and save tweets, using a **query** value

In [1]:
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('../.env').resolve()
load_dotenv(dotenv_path=env_path)

True

## API access

First of all, we'll connect to the Twitter API

In [2]:
import os

In [3]:
consumer_key = os.getenv("CONSUMER_KEY")
consumer_secret = os.getenv("CONSUMER_SECRET")
access_token = os.getenv("ACCESS_TOKEN")
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET")

In [4]:
from tweepy import OAuthHandler, API, TweepError

In [5]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
print('Successfully connected to the Twitter API.')

Successfully connected to the Twitter API.


## Search Tweets

Now we can define our query and search for the tweets containing it.

- **query**: *hashtag* or *emoji* that will be used to fetch the tweets
- **max_requests**: Maximum number of requests to the API.
    - Restriction: 180 requests / 15 min window

In [6]:
query = '#a'
max_requests = 180

In [7]:
# Converts aliases to the real emoji representation (e.g. :thumbs_up: => 👍)

from emoji import emojize

In [8]:
q = emojize(query) + ' -filter:retweets'
searched_tweets = []
last_id = -1
request_count = 0
while request_count < max_requests:
    try:
        new_tweets = api.search(q=q,
                                lang='en',
                                count=100,
                                max_id=str(last_id - 1),
                                tweet_mode='extended')
        if not new_tweets:
            break
        searched_tweets.extend(new_tweets)
        last_id = new_tweets[-1].id
        request_count += 1
    except TweepError as e:
        print(e)
        break

## Format and save

Format the API data to the desired structure and save a `.csv` file

In [9]:
import pandas as pd

In [10]:
data = []
for tweet in searched_tweets:
    data.append([tweet.id, tweet.created_at, tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data=data, columns=['id', 'date', 'user', 'text'])
print(str(len(data)) + ' ' + query + ' tweets')

66 #angry tweets


In [11]:
df.head()

Unnamed: 0,id,date,user,text
0,1151133382627057664,2019-07-16 14:16:00,DaradeAbhijeet,Don't promise when you are #Happy\n&amp;\nDon'...
1,1151124672496324608,2019-07-16 13:41:23,TheRealFakeJack,@realDonaldTrump 4:20 am it is a sign u need t...
2,1151118984793776129,2019-07-16 13:18:47,masterofnaps,There's a special place in hell for people who...
3,1151115966220328960,2019-07-16 13:06:47,TiknisArts,We know #Trump needs #attention to survive. It...
4,1151113082099232768,2019-07-16 12:55:20,emilieraddish,Get your Instagram photo elsewhere not on top ...


In [12]:
PATH = Path('../datasets/tweepy').resolve()
filename = query + '.csv'
df.to_csv(os.path.join(PATH, filename), index=None)
print('Saved under: "' + PATH.as_posix() + '"')

Saved under: "/home/rmohashi/Workspace/emotion-from-tweets/datasets/tweepy"
