In [None]:
import os
import pandas as pd
import requests

API_KEY = "**********************************"
IMAGE_BASE_URL = "https://image.tmdb.org/t/p/"
POSTER_SIZE = "w500"      # Options: w92, w154, w185, w342, w500, w780, original
BACKDROP_SIZE = "w780"    # Options similar to poster sizes

poster_dir = r"posters"
backdrop_dir = r"backdrops"

os.makedirs(poster_dir, exist_ok=True)
os.makedirs(backdrop_dir, exist_ok=True)

df = pd.read_csv(r"D:\movrec\data_preprocessing\cpdata3v2\movies.csv")

def download_image(url, save_path):
    """Download an image from a URL and save it to the specified path."""
    if not url:
        print(f"No URL provided for {save_path}. Skipping download.")
        return
    try:
        response = requests.get(url, stream=True, timeout=10)
        response.raise_for_status()
        with open(save_path, 'wb') as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)
    except requests.exceptions.HTTPError as http_err:
        print(f'HTTP error occurred while downloading {url}: {http_err}')
    except Exception as err:
        print(f'Error occurred while downloading {url}: {err}')

def get_full_image_url(path, size):
    """Construct the full image URL from the path and size."""
    if path:
        return f"{IMAGE_BASE_URL}{size}{path}"
    return None

def sanitize_filename(filename):
    """Remove or replace characters that are invalid in filenames."""
    import string
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    return ''.join(c for c in filename if c in valid_chars)

print("Starting downloads...")

for index, row in df.iterrows():
    movie_id = row.get('Movie ID') or row.get('id')  # Adjust based on your DataFrame

    # Download poster
    poster_path = row.get('Poster Path')
    if poster_path and pd.notna(poster_path):
        poster_url = get_full_image_url(poster_path, POSTER_SIZE)
        poster_filename = sanitize_filename(f"{movie_id}_poster.jpg")
        poster_save_path = os.path.join(poster_dir, poster_filename)
        download_image(poster_url, poster_save_path)
        print(f"Downloaded poster for movie ID {movie_id}")

    # Download backdrop
    backdrop_path = row.get('Backdrop Path')
    if backdrop_path and pd.notna(backdrop_path):
        backdrop_url = get_full_image_url(backdrop_path, BACKDROP_SIZE)
        backdrop_filename = sanitize_filename(f"{movie_id}_backdrop.jpg")
        backdrop_save_path = os.path.join(backdrop_dir, backdrop_filename)
        download_image(backdrop_url, backdrop_save_path)
        print(f"Downloaded backdrop for movie ID {movie_id}")

print("All downloads completed.")


Starting downloads...
Downloaded poster for movie ID 1
Downloaded backdrop for movie ID 1
Downloaded poster for movie ID 2
Downloaded backdrop for movie ID 2
Downloaded poster for movie ID 3
Downloaded backdrop for movie ID 3
Downloaded poster for movie ID 4
Downloaded backdrop for movie ID 4
Downloaded poster for movie ID 5
Downloaded backdrop for movie ID 5
Downloaded poster for movie ID 6
Downloaded backdrop for movie ID 6
Downloaded poster for movie ID 7
Downloaded backdrop for movie ID 7
Downloaded poster for movie ID 8
Downloaded backdrop for movie ID 8
Downloaded poster for movie ID 9
Downloaded backdrop for movie ID 9
Downloaded poster for movie ID 10
Downloaded backdrop for movie ID 10
Downloaded poster for movie ID 11
Downloaded backdrop for movie ID 11
Downloaded poster for movie ID 12
Downloaded backdrop for movie ID 12
Downloaded poster for movie ID 13
Downloaded backdrop for movie ID 13
Downloaded poster for movie ID 14
Downloaded backdrop for movie ID 14
Downloaded poste