In [15]:
#import libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import tweepy
import time
from tweepy.error import TweepError

import string
import nltk
from nltk import word_tokenize, FreqDist
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
import re
from decouple import config

**Twitter API: getting started**

https://developer.twitter.com/en/docs/twitter-api/getting-started/guide

**Set up access to Twitter API**

In [16]:
#variables that contain the credentials to access Twitter API
#add your credentials
ACCESS_TOKEN = config('ACCESS_TOKEN')
ACCESS_SECRET = config('ACCESS_SECRET')
CONSUMER_KEY = config('CONSUMER_KEY')
CONSUMER_SECRET = config('CONSUMER_SECRET')


#setup access to API
def connect_to_twitter_OAuth():
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    api = tweepy.API(auth, wait_on_rate_limit=True,wait_on_rate_limit_notify=True, compression=True)
    return api


# Create API object
api = connect_to_twitter_OAuth()

In [63]:
# Testing API connection
public_tweets = api.home_timeline()
for tweet in public_tweets[:3]:
    print(tweet.text)

Theator, an NVIDIA Inception member, aims to become the brain behind autonomous surgery. #DataScience… https://t.co/H5JJOiiJVZ
Maryland To Become First State To Tax Online Ads Sold By Facebook and Google. https://t.co/sKtIcFwUGf
RT @adamvaughan_uk: .@stubutchart: “There’s lots of doom &amp; gloom stories about biodiversity. It would be easy to feel conservation was a po…


**Data collection**

In [70]:
#specify hashtag, time interval and no of items

msgs = []

food_hashtags = ['#cooking', '#ingredients', '#recipes', '#vegetarian', '#vegan', '#healthfood', '#healthyrecipe', '#recipebox']
comp_hashtags = ['#mindfulchef', '#hellofresh', '#gousto', '#simplycook', '#abelandcole', '#morrisonseatfresh', '#riverfordorganicfarmers', '#allplants']

for hashtag in comp_hashtags:
    print('Searching Hashtag:', hashtag)
    for tweet in tweepy.Cursor(api.search, q=hashtag, lang= 'en', since='2021-02-08', until='2021-02-17').items(1000):
        try:
            msg = [tweet.text, tweet.favorite_count, tweet.retweet_count, pd.Timestamp(tweet.created_at), hashtag]
            msgs.append(msg)
        except TweepError:
            print('Error, trying again in 60 secs...')
            time.sleep(60)
            print('Restarting')
            continue

df = pd.DataFrame(msgs, columns=['text', 'likes', 'retweets', 'created_at','hashtag'])

Searching Hashtag: #mindfulchef
Searching Hashtag: #hellofresh
Searching Hashtag: #gousto
Searching Hashtag: #simplycook
Searching Hashtag: #abelandcole
Searching Hashtag: #morrisonseatfresh
Searching Hashtag: #riverfordorganicfarmers
Searching Hashtag: #allplants


In [71]:
len(df)

209

In [72]:
df[:5]

Unnamed: 0,text,likes,retweets,created_at,hashtag
0,RT @MindfulChefUK: Celebrate #PancakeDay the h...,0,1,2021-02-16 18:38:50,#mindfulchef
1,Celebrate #PancakeDay the healthy way 🥞 with ...,2,1,2021-02-16 13:00:55,#mindfulchef
2,Turkey pasanda curry with brown rice 🥰 #monday...,0,0,2021-02-15 17:46:55,#mindfulchef
3,RT @MindfulChefUK: MC Virtual Events | Join Be...,0,2,2021-02-15 09:22:43,#mindfulchef
4,RT @MindfulChefUK: MC Virtual Events | Join Be...,0,2,2021-02-15 08:02:22,#mindfulchef


In [73]:
df.to_csv('../datasets/twitter_data/tweets.csv',mode='a', index=False, header=False)

In [74]:
df2 = pd.read_csv('../datasets/twitter_data/tweets.csv')

In [75]:
len(df2)

7541

In [80]:
df2.sort_values(by=['created_at'], na_position='first', inplace=True)

In [81]:
df2.to_csv('../datasets/twitter_data/tweets.csv', index=False)