## Collecting the streaming options for top 250 IMDb rated movies

### Source Link: https://rapidapi.com/utelly/api/utelly

In [11]:
# Import dependencies

import pandas as pd
import requests
import re
from config_omdb import omdb_key

import json
from pprint import pprint

#### Step 1: Collecting Top 250 IMDb from IBDb website

Source Link: https://www.imdb.com/chart/top/?ref_=nv_mv_250

In [12]:
# URL for top 250 IMdb movies

url = 'https://www.imdb.com/chart/top/?ref_=nv_mv_250'

In [13]:
# Read all the tables 

tables = pd.read_html(url)

# Printing the required table

raw_imdb_df = tables[0]
raw_imdb_df.head()

Unnamed: 0.1,Unnamed: 0,Rank & Title,IMDb Rating,Your Rating,Unnamed: 4
0,,1. The Shawshank Redemption (1994),9.2,12345678910 NOT YET RELEASED Seen,
1,,2. The Godfather (1972),9.1,12345678910 NOT YET RELEASED Seen,
2,,3. The Godfather: Part II (1974),9.0,12345678910 NOT YET RELEASED Seen,
3,,4. The Dark Knight (2008),9.0,12345678910 NOT YET RELEASED Seen,
4,,5. 12 Angry Men (1957),8.9,12345678910 NOT YET RELEASED Seen,


In [14]:
raw_imdb_df = raw_imdb_df.drop(['Unnamed: 0','Your Rating', 'Unnamed: 4'], axis = 1)
raw_imdb_df.head()

Unnamed: 0,Rank & Title,IMDb Rating
0,1. The Shawshank Redemption (1994),9.2
1,2. The Godfather (1972),9.1
2,3. The Godfather: Part II (1974),9.0
3,4. The Dark Knight (2008),9.0
4,5. 12 Angry Men (1957),8.9


In [16]:
rank_list = []
title_list = []
year_list = []

for movie in raw_imdb_df['Rank & Title']:
    
    rank = movie.split('.')[0]
    rank_list.append(rank)
    
    title1 = re.split('[.(]', movie)[1]
    title = re.sub("  ", "", title1)
    title_list.append(title)
        
    year = re.split('[()]', movie)[1]
    year_list.append(year)

In [17]:
dict_imdb = {'IMDb Rank': rank_list,
             'Movie Title': title_list,
             'Year Released': year_list,
             'IMDb Rating' : raw_imdb_df['IMDb Rating']
        }

Top_250_IMDb_df = pd.DataFrame(dict_imdb)

Top_250_IMDb_df.head()

Unnamed: 0,IMDb Rank,Movie Title,Year Released,IMDb Rating
0,1,The Shawshank Redemption,1994,9.2
1,2,The Godfather,1972,9.1
2,3,The Godfather: Part II,1974,9.0
3,4,The Dark Knight,2008,9.0
4,5,12 Angry Men,1957,8.9


In [18]:
Top_250_IMDb_df.to_csv('Output/Top_250_IMDb.csv', index=False)

### Step 2: Collecting IMDb unique ID and other movie details from OMDb API

Source Link: http://www.omdbapi.com/

In [19]:
url = "http://www.omdbapi.com/?apikey=" + '28b0dbf9' + "&t="

In [20]:
movie_list = Top_250_IMDb_df['Movie Title']

In [None]:
results_omdb = [];

for movie in movie_list:
    movie_data = requests.get(url + movie).json()
    results_omdb.append(movie_data)

In [None]:
# Saving into a JSON file
# Source Link: https://stackabuse.com/reading-and-writing-json-files-in-python-with-pandas/

with open ('Output/OMDb_250.json', 'w') as f:
    json.dump(results_omdb, f)

In [None]:
# reading JSON file through dataframe

raw_omdb_df = pd.read_json('Output/OMDb_250.json')
raw_omdb_df.head(2)

In [25]:
raw_omdb_df.shape

(250, 26)

In [26]:
#Showing all column titles
list(raw_omdb_df.columns)

['Title',
 'Year',
 'Rated',
 'Released',
 'Runtime',
 'Genre',
 'Director',
 'Writer',
 'Actors',
 'Plot',
 'Language',
 'Country',
 'Awards',
 'Poster',
 'Ratings',
 'Metascore',
 'imdbRating',
 'imdbVotes',
 'imdbID',
 'Type',
 'DVD',
 'BoxOffice',
 'Production',
 'Website',
 'Response',
 'Error']

In [27]:
# raw_omdb_df.to_csv('Output/OMDb_raw_df.csv', index=False)
# raw_omdb_df.to_csv('Output/OMDb_raw_df_index.csv')

In [29]:
id_imdb_list = raw_omdb_df['imdbID']

In [31]:
len(id_imdb_list)

250

### Step 3: Collecting streaming options for Top 250 IMDb movies

Source Link: Utelly API Documentation
https://rapidapi.com/utelly/api/utelly?endpoint=apiendpoint_3cad787b-ca7b-449a-84b4-23b40d64fd73

Endpoint: GET/idlookup

In [70]:
from config import api_key
import time

In [34]:
# url = "https://utelly-tv-shows-and-movies-availability-v1.p.rapidapi.com/idlookup"
# querystring = {"country":"US","source_id":"tt0468569","source":"imdb"}
# headers = {
#     'x-rapidapi-host': "utelly-tv-shows-and-movies-availability-v1.p.rapidapi.com",
#     'x-rapidapi-key': api_key
#     }

In [None]:
# response = requests.request("GET", url, headers=headers, params=querystring)

In [43]:
# json_result = response.json()
# json_result

{'collection': {'id': '5d97dab59a76a40056de6bcb',
  'picture': 'https://utellyassets9-1.imgix.net/api/Images/149c8a3dab9ffbd65c1cc655724f29aa/Redirect',
  'name': 'Dark Knight - The Dark Knight',
  'locations': [{'icon': 'https://utellyassets7.imgix.net/locations_icons/utelly/black_new/iTunesIVAUS.png?w=92&auto=compress&app_version=ae3576e2-0796-4eda-b953-80cadc8e2619_eww2020-05-09',
    'country': ['us'],
    'display_name': 'iTunes',
    'name': 'iTunesIVAUS',
    'id': '5d80a9a5d51bef861d3740d3',
    'url': 'https://itunes.apple.com/us/movie/the-dark-knight/id764632601'},
   {'icon': 'https://utellyassets7.imgix.net/locations_icons/utelly/black_new/GooglePlayIVAUS.png?w=92&auto=compress&app_version=ae3576e2-0796-4eda-b953-80cadc8e2619_eww2020-05-09',
    'country': ['us'],
    'display_name': 'Google Play',
    'name': 'GooglePlayIVAUS',
    'id': '5d8260b128fbcd0052aed197',
    'url': 'https://play.google.com/store/movies/details/The_Dark_Knight?gl=US&hl=en&id=qY3UkAHufLY'},
   {'i

In [86]:
url = "https://utelly-tv-shows-and-movies-availability-v1.p.rapidapi.com/idlookup"

querystring = {"country":"US","source":"imdb"}

headers = {
    'x-rapidapi-host': "utelly-tv-shows-and-movies-availability-v1.p.rapidapi.com",
    'x-rapidapi-key': api_key
    }

In [87]:
# id_imdb_list_sample = ['tt0111161', 'tt0068646', 'tt0071562']

In [88]:
imdb_id = []
title = []
streaming_service = []
streaming_url = []
all_requests_json = []

for id_imdb in id_imdb_list:
    time.sleep(2)
    querystring["source_id"] = id_imdb
    
    try:
        response = requests.request("GET", url, headers=headers, params=querystring)

        json_result = response.json()
        all_requests_json.append(json_result)

        for location in json_result['collection']['locations']:
            imdb_id.append(json_result['id'])
            title.append(json_result['collection']['name'])
            streaming_service.append(location['display_name'])
            streaming_url.append(location['url'])
        
    except:
        print('Data Not Available')
        print(id_imdb)
        print('\n')

In [89]:
with open ('Output/json_files/utelly_all_requests_json.json', 'w') as f:
    json.dump(all_requests_json, f)

In [90]:
streaming_dict = {'IMDb ID': imdb_id,
                 'Title': title,
                 'Streaming Service': streaming_service,
                 'Streaming URL' : streaming_url
                }

In [91]:
streaming_df = pd.DataFrame(streaming_dict)
streaming_df

Unnamed: 0,IMDb ID,Title,Streaming Service,Streaming URL
0,tt0111161,The Shawshank Redemption,Netflix,https://www.netflix.com/title/70005379
1,tt0111161,The Shawshank Redemption,Google Play,https://play.google.com/store/movies/details/T...
2,tt0111161,The Shawshank Redemption,Amazon Instant Video,https://www.amazon.com/gp/product/B001EBV0P8?c...
3,tt0111161,The Shawshank Redemption,iTunes,https://itunes.apple.com/us/movie/the-shawshan...
4,tt0068646,The Godfather,Google Play,https://play.google.com/store/movies/details/T...
...,...,...,...,...
694,tt0103639,Aladdin,Google Play,https://play.google.com/store/movies/details/A...
695,tt0103639,Aladdin,DisneyPlusIVAUS,https://www.disneyplus.com/movies/aladdin-1992...
696,tt2338151,PK,Netflix,https://www.netflix.com/title/70303496
697,tt0094625,Akira,Hulu,https://www.hulu.com/watch/64a5a8d0-1406-4178-...


In [92]:
streaming_df.shape

(699, 4)

In [93]:
# streaming_df.to_csv('Output/streaming_df_index.csv')
streaming_df.to_csv('Output/streaming_df.csv', index=False)