# TVDB - Getting Tweets about TV Shows

Twitter implements OAuth 1.0A as its standard authentication mechanism, and in order to use it to make requests to Twitter's API, you'll need to go to https://dev.twitter.com/apps and create a sample application. 

Twitter examples from the python-twitter API  [https://github.com/ideoforms/python-twitter-examples](https://github.com/ideoforms/python-twitter-examples)  

# Authorizing an application to access Twitter account data

In [1]:
import pandas as pd
import twitter # pip install twitter

# Go to http://dev.twitter.com/apps/new to create an app and get values
# for these credentials, which you'll need to provide in place of these
# empty string values that are defined as placeholders.
# See https://dev.twitter.com/docs/auth/oauth for more information 
# on Twitter's OAuth implementation.

CONSUMER_KEY = 'OEI2FGg6CmqvegsnMfLrj2oNf'
CONSUMER_SECRET = '9TQcPiWBVFUCKtCFIuXd25GKiI270dPxppC8oI5jN0GlewKeI8'
OAUTH_TOKEN = '104745048-sD2ixBTZYgYnd0amOqQKZmkTv7cybefY44bAnnkI'
OAUTH_TOKEN_SECRET = '3Eq217JOxKUmBl233fZjjyRdCLrmvWQWz2P5xh1EheYdb'

auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

# Nothing to see by displaying twitter_api except that it's now a
# defined variable

print(twitter_api)

<twitter.api.Twitter object at 0x0000022214BFFB70>


## Get the list of TV shows (which we will search Twitter for) using BeautifulSoup

In [2]:
from requests import get
from bs4 import BeautifulSoup as bs

url = "https://www.imdb.com/search/title?title_type=tv_series"

response = get(url)
response

html_soup = bs(response.text,'html.parser')
type(html_soup)


id_check = html_soup.find(id ="main")
tv_show_container = id_check.find_all(class_ ="lister-item mode-advanced")
len(tv_show_container)

container = tv_show_container[0]

#list to store scraped value data in:
tv_show_names = []


for container in tv_show_container:
    
    if container.find("div", class_ = "ratings-bar") is not None:
        
        name = container.h3.a.text
        tv_show_names.append(name)

#Store the list of TV Shows in a data frame
TVShowDF = pd.DataFrame({"tv_show_names":tv_show_names})
print(TVShowDF)

                              tv_show_names
0                           Game of Thrones
1                          The Walking Dead
2                                    The OA
3                      Love, Death & Robots
4                                   The Act
5                      The Umbrella Academy
6                           Stranger Things
7                                     Hanna
8                              Supernatural
9                             American Gods
10                           Grey's Anatomy
11                                Riverdale
12                                The Order
13                     Star Trek: Discovery
14                               After Life
15                            The Blacklist
16                       Brooklyn Nine-Nine
17                        Into the Badlands
18                                   Gotham
19                                The Flash
20                              The Orville
21                What We Do in 

## Create a new column in the TVShowDF to hold hashtags which we will search on Twitter

In [3]:
#Create a new column called "hashtag" in the dataframe and remove all spaces
TVShowDF['hashtag'] = TVShowDF.tv_show_names.str.replace(' ','')
#Remove non alphabetic characters
TVShowDF['hashtag'] = TVShowDF.hashtag.str.replace('[^a-zA-Z]', '')
#Prefix "#" symbol to all the values in the hashtag column
TVShowDF['hashtag'] = '#'+TVShowDF.hashtag
TVShowDF.hashtag

0                          #GameofThrones
1                         #TheWalkingDead
2                                  #TheOA
3                        #LoveDeathRobots
4                                 #TheAct
5                     #TheUmbrellaAcademy
6                         #StrangerThings
7                                  #Hanna
8                           #Supernatural
9                           #AmericanGods
10                          #GreysAnatomy
11                             #Riverdale
12                              #TheOrder
13                     #StarTrekDiscovery
14                             #AfterLife
15                          #TheBlacklist
16                      #BrooklynNineNine
17                       #IntotheBadlands
18                                #Gotham
19                              #TheFlash
20                            #TheOrville
21                  #WhatWeDointheShadows
22                             #TheOffice
23                              #B

## Getting Tweets

In [4]:
#Set the number of Tweets that we want
n = 5000
from urllib.parse import unquote
# See https://dev.twitter.com/rest/reference/get/search/tweets
#create and empty list to hold the tweets
tweet_results = []

#loop through the list of hashtag and make an api call and search twitter for a hashtag in each iteration
for row in TVShowDF.index:
    #print(type(twitter_api.search.tweets(q=test_df.hashtag[row], count=n, lang='en')))
    tweet_results.append(twitter_api.search.tweets(q=TVShowDF.hashtag[row], count=n, lang='en'))
print(tweet_results[0])

{'statuses': [{'created_at': 'Tue Apr 02 00:12:15 +0000 2019', 'id': 1112870320904429568, 'id_str': '1112870320904429568', 'text': 'RT @9GAG: Leak footage of the final battle of #gameofthrones season 8 https://t.co/S85aR3MatP', 'truncated': False, 'entities': {'hashtags': [{'text': 'gameofthrones', 'indices': [46, 60]}], 'symbols': [], 'user_mentions': [{'screen_name': '9GAG', 'name': '9GAG', 'id': 16548023, 'id_str': '16548023', 'indices': [3, 8]}], 'urls': [{'url': 'https://t.co/S85aR3MatP', 'expanded_url': 'https://twitter.com/ThePixelFactor/status/910642964011081728', 'display_url': 'twitter.com/ThePixelFactor…', 'indices': [70, 93]}]}, 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'}, 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 9976613607197

## Flatten the list of tweets, get the fields we are interested in using list comprehension into a dataframe and export the data to a CSV file

In [5]:
def GetTweetFields(tr):
    return {"text": tr["text"]
            , "created_at": tr["created_at"]
            , "favourites_count" :  tr["user"]["favourites_count"]
            , "screen_name" : tr["user"]["screen_name"]
            , "location" : tr["user"]["location"]
            }
alltweets = [[GetTweetFields(ttrr) for ttrr in tr["statuses"]] for tr in tweet_results]
df = pd.DataFrame([item for sublist in alltweets for item in sublist],columns=['text','created_at','favourites_count','screen_name','location'])
df.head()
#export the dataframe to csv
df.to_csv('tweets.csv',index=False)