In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# URL of the IMDb full credits page
url = 'https://www.imdb.com/title/tt0386676/fullcredits/?ref_=tt_cl_sm'

# Send a GET request to the IMDb page
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Locate the 'Series Cast' section
    cast_list = soup.find('table', class_='cast_list')

    # Lists to store the scraped data
    actors, full_info = [], []

    # Extract data for all columns
    for row in cast_list.find_all('tr'):
        cols = row.find_all('td')
        if len(cols) > 3:
            # Append actor name
            actors.append(cols[1].get_text(strip=True))
            # Append full character info
            full_info.append(cols[3].get_text(strip=True))

    # Create a DataFrame with the initial data
    df = pd.DataFrame({
        'Actor': actors,
        'FullInfo': full_info
    })

    # Define a regular expression pattern to extract the character name, episode count, and years
    pattern = r'(?P<Character>[^\d/]+?)(?:/ ...)?\s*(?P<Episodes_Count>\d+)?\s*episodes?,?\s*(?P<Years>\d{4}(?:-\d{4})?)?(?:\(uncredited\))?'
    
    # Extract the pattern into new columns
    df_extracted = df['FullInfo'].str.extract(pattern)

    # Concatenate the new columns to the original dataframe
    df_final = pd.concat([df['Actor'], df_extracted], axis=1)

    # Define the path for the CSV file
    csv_file_path = 'C:\\Users\\Lado\\Google Drive\\SocialGraphs\\Untitled Folder\\cast_details.csv'

    # Save the DataFrame to a CSV file
    df_final.to_csv(csv_file_path, index=False)
    print(f"CSV file has been saved successfully to {csv_file_path}.")
else:
    print("Failed to retrieve the IMDb page")

CSV file has been saved successfully to C:\Users\Lado\Google Drive\SocialGraphs\Untitled Folder\cast_details.csv.
