Extracting infos from IMDb database

This code extract two simple CSV/JSON files  with some infos as: Title, Year, Rating, Genres and Plot.

In [22]:
from imdb import IMDb
import csv
import json

# Create an IMDb instance
ia = IMDb()

# Replace 'movie_title' with the actual title of the movie you're interested in
movie_title = "Eyes Wide Shut"
movies = ia.search_movie(movie_title)

if movies:
    movie = movies[0]
    ia.update(movie)
else:
    print("Movie not found.")
    exit()

# Extract more metadata fields
movie_data = {
    "Title": movie.get("title"),
    "Year": movie.get("year"),
    "Rating": movie.get("rating"),
    "Genres": ', '.join(movie.get("genres", [])),
    "Plot": movie.get("plot outline", "N/A"),
    # Add more fields as needed
}

output_file = "metadata_IMDb.csv"
json_filename = "metadata_IMDb.json"

with open(output_file, "w", newline="", encoding="utf-8") as csv_file:
    fieldnames = movie_data.keys()
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow(movie_data)

# Write the data to a JSON file
with open(json_filename, 'w', encoding='utf-8') as jsonfile:
    json.dump(movie_data, jsonfile, ensure_ascii=False, indent=4)
    
print(f"Metadata saved to '{json_filename}'")


Metadata saved to 'metadata_IMDb.json'


This code extract infos about the crew and save them in CSV/JSON files.

In [21]:
import csv
import json
from imdb import IMDb

# Create an IMDb object
ia = IMDb()

# Replace 'Movie Title' with the actual title of the movie you want to retrieve
movie_title = 'Eyes Wide Shut'

# Search for the movie by title
movies = ia.search_movie(movie_title)

if movies:
    # Get the first movie in the search results (you can modify this if needed)
    movie = movies[0]
    
    # Fetch the movie details
    ia.update(movie, info='main')
    
    # Create a dictionary to store roles and associated names
    roles_dict = {}
    
    # Production companies
    if 'production companies' in movie:
        roles_dict['Production Company'] = [str(company) for company in movie['production companies']]
    
    # Producers
    if 'producer' in movie:
        roles_dict['Producer'] = [str(producer) for producer in movie['producer']]
    
    # Writers
    if 'writer' in movie:
        roles_dict['Writer'] = [str(writer) for writer in movie['writer']]
    
    # Directors
    if 'director' in movie:
        roles_dict['Director'] = [str(director) for director in movie['director']]
    
    # Define the name of the CSV and JSON files
    csv_filename = 'crew_IMDb_metadata.csv'
    json_filename = 'crew_IMDB_metadata.json'
    
    # Write the data to a CSV file
    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['Role', 'Name'])  # Write header
        for role, names in roles_dict.items():
            csv_writer.writerows([(role, name) for name in names])
        
    print(f"Metadata saved to '{csv_filename}'")
    
    # Write the data to a JSON file
    with open(json_filename, 'w', encoding='utf-8') as jsonfile:
        json.dump(roles_dict, jsonfile, ensure_ascii=False, indent=4)
        
    print(f"Metadata saved to '{json_filename}'")
else:
    print(f"No movies found with the title '{movie_title}'")


Metadata saved to 'crew_IMDb_metadata.csv'
Metadata saved to 'crew_IMDB_metadata.json'


This code collect infos about the name and the role of all the actor playing in a choosen movie.
It return the data already in XML format with the tags <role> and <actor>.


In [None]:
from imdb import IMDb
from imdb_ref import print_imdb_actor_page

# Create an IMDb instance
ia = IMDb()

# Search for a movie by title
movie_title = "Eyes Wide Shut" # Choose the title
movies = ia.search_movie(movie_title)


if movies:
    # Get the first search result (movie)
    first_movie = movies[0]
    
    # Update the movie details
    ia.update(first_movie, info=['main', 'cast'])
    
    print("Title:", first_movie["title"])

    
    # Print the list of actors and their roles
    for actor in first_movie["cast"]:
        url = print_imdb_actor_page(actor["name"])
        print("<role>",actor.currentRole,"</role>")
        print("<actor ref='",f"{url}" ,"'>", actor["name"],"<actor>")
        print("---")
else:
    print("No results found for", movie_title)