## 1. Import packages

In [None]:
#!pip install tweepy

In [None]:
import os
import tweepy as tw
import pandas as pd
from tqdm import tqdm, notebook

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)

## 2. Twitter API authentication

In [None]:
consumer_key = os.environ["consumer_key"]
consumer_secret = os.environ["consumer_secret"]
access_token= os.environ["access_token"]
access_token_secret=os.environ["access_token_secret"]

In [None]:
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)

## 3. Tweets query

### 3.1. Define the query

In [None]:
#Replace key_word with the keyword to use for collecting tweets
search_words = "#key_word -filter:retweets"

# Collect tweets
tweets = tw.Cursor(api.search_tweets,
              q=search_words,
              lang="en",
              ).items(15000)

### 3.2. Retreive the tweets

In [None]:
tweets_copy = []
for tweet in tqdm(tweets):
     tweets_copy.append(tweet)

In [None]:
print("new tweets retrieved: {len(tweets_copy)}")

## 4. Populate the dataset

In [None]:
tweets_df = pd.DataFrame()
for tweet in tqdm(tweets_copy):
    hashtags = []
    try:
        for hashtag in tweet.entities["hashtags"]:
            hashtags.append(hashtag["text"])
        text = api.get_status(id=tweet.id, tweet_mode='extended').full_text
    except:
        pass
    tweets_df = tweets_df.append(pd.DataFrame({'user_name': tweet.user.name, 
                                               'user_location': tweet.user.location,\
                                               'user_description': tweet.user.description,
                                               'user_created': tweet.user.created_at,
                                               'user_followers': tweet.user.followers_count,
                                               'user_friends': tweet.user.friends_count,
                                               'user_favourites': tweet.user.favourites_count,
                                               'user_verified': tweet.user.verified,
                                               'date': tweet.created_at,
                                               'text': text, 
                                               'hashtags': [hashtags if hashtags else None],
                                               'source': tweet.source,
                                               'is_retweet': tweet.retweeted}, index=[0]))

In [None]:
tweets_df.head()

### 5. Save the tweets

In [None]:
tweets_df.to_csv("tweets.csv", index=False)