In [None]:
'''
What This Script Does:
1. Reads movie titles from the CSV file

For each title:

2. Searches Watchmode's database for matches
3. If found, checks UK streaming availability (region GB)
4. Records all subscription and rental options
5. Saves results to a JSON file with:

    - Movie title
    - Available streaming platforms
    - Type of availability (subscription/rent)
    - Price (for rentals)
    - Direct links (when available)

6. Converts the json file into csv fiel
'''
import csv
import json
import requests
from tqdm import tqdm # using this library to show progress bar

import pandas as pd
from config.watchmode import api_key

In [13]:
# configuration

'''
fetching data from 'imdb-movies.csv' file and picking up 'title' column from this file

'''

imdb_movies= pd.read_csv('Outputs/imdb-movies.csv') 
movie_titles = imdb_movies['transformed_title']

output_streaming_json = "streaming_options.json"
watchmode_api_key = api_key


In [14]:
len(movie_titles)

250

In [15]:
 # Check streaming options for each movie

results = []

for title in tqdm(movie_titles, desc="Checking streaming options"):

    try:
        # Search for the title
        search_url = f"https://api.watchmode.com/v1/search/?apiKey={watchmode_api_key}&search_field=name&search_value={title}"
        search_response = requests.get(search_url)
        search_response.raise_for_status()
        search_data = search_response.json()

        # Get the first movie result (if any)
        movie_id = None
        
        if search_data.get('title_results'):
            movie_id = search_data['title_results'][0]['id']
        
        streaming_options = {}
       
        if movie_id:
            # Get streaming sources
            sources_url = f"https://api.watchmode.com/v1/title/{movie_id}/sources/?apiKey={watchmode_api_key}&regions=GB"
            sources_response = requests.get(sources_url)
            sources_response.raise_for_status()
            sources_data = sources_response.json()

            # Process sources
            for source in sources_data:
                if source['type'] == 'sub':
                    streaming_options[source['name']] = {
                        'type': 'subscription',
                        'url': source.get('web_url', '')
                    }
                elif source['type'] == 'rent':
                    streaming_options[source['name']] = {
                        'type': 'rent',
                        'price': source.get('price', ''),
                        'url': source.get('web_url', '')
                    }

        # Add to results
        results.append({
            "title": title,
            "streaming_options": streaming_options,
            "error": None
        })

    except requests.exceptions.RequestException as e:
        results.append({
            "title": title,
            "streaming_options": {},
            "error": str(e)
        })

Checking streaming options: 100%|██████████| 250/250 [06:06<00:00,  1.47s/it]


In [20]:
# Save results to JSON

with open('Outputs/output_streaming_json', 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=2)

In [22]:
# Load the JSON data

with open('streaming_options.json', 'r') as f:
    data = json.load(f)

In [23]:
# Prepare the CSV file

with open('Outputs/streaming_options.csv', 'w', newline='') as csvfile:
    
    writer = csv.writer(csvfile)
    writer.writerow(['title', 'platform', 'type', 'url', 'price'])  # Header row
    
    for movie in data:

        title = movie['title']
        streaming_options = movie.get('streaming_options', {})
        
        for platform, details in streaming_options.items():

            row = [
                title, # movie name
                platform, # platform (streaming service)
                details['type'], #(subscription/rent)
                details.get('url', ''),
                str(details.get('price', ''))  # Convert to string to handle None
            ]
            
            writer.writerow(row) # Writes each combination as a row in the CSV