# Fetch Tweets

Donwload and save tweets, using a **query** value

In [1]:
import tweepy
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('C:/Users/Timo/Source/Repos/NLP/NLP_ChatBot/.env').resolve()
load_dotenv(dotenv_path=env_path)

True

## API access

First of all, we'll connect to the Twitter API

In [2]:
import os

In [3]:
consumer_key = os.getenv("CONSUMER_KEY")
consumer_secret = os.getenv("CONSUMER_SECRET")
access_token = os.getenv("ACCESS_TOKEN")
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET")

In [4]:
from tweepy import OAuthHandler,API,TweepError

In [5]:
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = API(auth)
print('Successfully connected to the Twitter API.')

Successfully connected to the Twitter API.


## Search Tweets

Now we can define our query and search for the tweets containing it.

- **query**: *hashtag* or *emoji* that will be used to fetch the tweets
- **max_requests**: Maximum number of requests to the API.
    - Restriction: 180 requests / 15 min window

In [6]:
query = ':anxious_face_with_sweat:'
max_requests = 180

In [7]:
# Converts aliases to the real emoji representation (e.g. :thumbs_up: => 👍)

from emoji import emojize

In [8]:
q = emojize(query) + ' -filter:retweets'
searched_tweets = []
last_id = -1
request_count = 0
while request_count < max_requests:
    try:
        new_tweets = api.search(q=q,
                                lang='en',
                                count=100,
                                max_id=str(last_id - 1),
                                tweet_mode='extended')
        if not new_tweets:
            break
        searched_tweets.extend(new_tweets)
        last_id = new_tweets[-1].id
        request_count += 1
    except TweepError as e:
        print(e)
        break

## Format and save

Format the API data to the desired structure and save a `.csv` file

In [9]:
import pandas as pd

In [10]:
data = []
for tweet in searched_tweets:
    data.append([tweet.id, tweet.created_at, tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data=data, columns=['id', 'date', 'user', 'text'])
print(str(len(data)) + ' ' + query + ' tweets')

17997 :anxious_face_with_sweat: tweets


In [11]:
df.head()

Unnamed: 0,id,date,user,text
0,1335882489886371842,2020-12-07 09:42:59,watashi_no_maho,@notyourpil i totally forgot 😰
1,1335882444193591296,2020-12-07 09:42:48,ramendates,chungha get well soon 😰💛
2,1335882402909007873,2020-12-07 09:42:39,shona_man,@rejecteee16 😰yoo sorry
3,1335882402829361153,2020-12-07 09:42:38,JunesFavGemini,My contact stuck in my eye! Anybody know how t...
4,1335882380331143169,2020-12-07 09:42:33,LostinTokio,Going to see the specialist again about my bre...


In [12]:
PATH = Path('C:/Users/Timo/Source/Repos/NLP/NLP_ChatBot/datasets/tweepy').resolve()
filename = query.replace(':', 'Z') + '.csv'
df.to_csv(os.path.join(PATH, filename), index=None)
print('Saved under: "' + PATH.as_posix() + '"')

Saved under: "C:/Users/Timo/Source/Repos/NLP/NLP_ChatBot/datasets/tweepy"
