In [85]:
import tweepy
import csv
import json
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
import re
import time

First part of the program where the data will be extracted and will be saved in json file

In [64]:
#loading twitter API credentials from the file which was saved
with open('twitter_credentials.json') as cred_data:
    info = json.load(cred_data)
    consumer_key = info['CONSUMER_KEY']
    consumer_secret = info['CONSUMER_SECRET']
    access_key = info['ACCESS_KEY']
    access_secret = info['ACCESS_SECRET']

In [65]:
# initialization of a list to hold all Tweets
alltweets = []

In [66]:
# Authorization and initialization
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)

In [67]:
# We will get the tweets with multiple requests of 200 tweets each
new_tweets = api.user_timeline(screen_name="midasIIITD", count=200)
# Appending the json part of the data to list
for tweet in new_tweets:
    alltweets.append(tweet._json)

In [68]:
# save id of 1 less than the oldest tweet
oldest = alltweets[-1]["id"] - 1

In [69]:
# grabbing tweets till none are left
while len(new_tweets) > 0:  
    # all subsiquent requests use the max_id param to prevent duplicates
    new_tweets = api.user_timeline(screen_name = "midasIIITD",count=200, max_id=oldest)

    # save most recent tweets
    # Appending the json part of the data to list
    for tweet in new_tweets:
        alltweets.append(tweet._json)
    oldest = alltweets[-1]["id"] - 1

    print("...%s tweets downloaded so far" % (len(alltweets)))

print("Total tweets downloaded %s" % (len(alltweets)))

...302 tweets downloaded so far
...302 tweets downloaded so far
Total tweets downloaded 302


In [92]:
# Creating a new JSON file and dumping the value into it
file = open('tweet.json', 'w')
json.dump(alltweets, file,indent = 4)

Second part of the program to parse the json file and display the output in tabular form

In [93]:
col_names = ["text", "Date and time", "number of favorites/likes", "number of retweets"]

In [94]:
# function to get no. of retweets
def get_retweet(data):
    try:
        ans = data["retweeted_status"]["retweet_count"] 
    except KeyError:
        ans = data["retweet_count"] 
    return ans

In [95]:
# function to get no. of likes
def get_favorite(data):
    try:
        ans = data["retweeted_status"]["favorite_count"] 
    except KeyError:
        ans = data["favorite_count"] 
    return ans    

In [96]:
# the json which I am getting from API doen't contain information about no. of images present in tweet
# I tried to get the no. of images from HTML of tweet url but the website is restricting to get so many
# requests at the same time. So, I haven't included no. of images column
def get_photos(data):
    url = data["entities"]["urls"][0]["expanded_url"]
    r = requests.get(url)
    soup = BS(r.text)
    x = str(soup.find("div", {"class":"AdaptiveMedia-container"}))
    ans = len(set(re.findall(r'(https?://[^\s]+)', x)))
    return ans

In [97]:
# dataframe to show in tabular format
df = pd.DataFrame(columns=col_names)

In [98]:
# loading the saved JSON file
f = open("tweet.json", "rb")
data = json.load(f)

In [99]:
for tw in data:
    text = re.sub(r"http\S+", "", tw["text"])
    dt = t["created_at"].split()
    date = dt[2]+'-'+dt[1]+'-'+dt[-1]+"|"+dt[3]
    favorite = get_favorite(tw)
    retweet = get_retweet(tw)
#     images = get_photos(tw)
    df.loc[len(df)] = [text, date, favorite, retweet]

In [100]:
df

Unnamed: 0,text,Date and time,number of favorites/likes,number of retweets
0,Congratulations @midasIIITD students Simra Sha...,25-Mar-2019|13:01:57,8,1
1,The last date for submitting a solution for th...,25-Mar-2019|13:01:57,8,3
2,RT @IIITDelhi: @IIITDelhi invites application ...,25-Mar-2019|13:01:57,4,4
3,One more week is left to submit the workshop p...,25-Mar-2019|13:01:57,4,0
4,RT @IEEEBigMM19: We are honored to have Dr. Ch...,25-Mar-2019|13:01:57,6,5
5,RT @IEEEBigMM19: Distinguished researchers Dr....,25-Mar-2019|13:01:57,13,3
6,@IEEEBigMM19 is also available on Facebook now...,25-Mar-2019|13:01:57,1,1
7,RT @IEEEBigMM19: BigMM 2019 : IEEE BigMM 2019 ...,25-Mar-2019|13:01:57,6,5
8,BigMM 2019 : IEEE BigMM 2019 – Call for Worksh...,25-Mar-2019|13:01:57,6,3
9,"Congratulations @midasIIITD team, Rohan, Prady...",25-Mar-2019|13:01:57,15,4
