In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import re
import wikipediaapi as wiki
from datetime import datetime
import imdb
import random
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import wikipedia

In [None]:
##########################
###     CONSTANTS      ###
##########################

In [64]:
# IMdb API key
api_key = "k_11m5rq35"

# headers
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

# Parent and Relative strings to search for in wikipedia infobox
parent_pattern = re.compile('Parent')
relative_pattern = re.compile('Relative')
starring_pattern = re.compile('Starring')
voice_pattern = re.compile('Voices of')
wikipedia_pattern = re.compile("wiki")

not_nepo_relationships = 'son|daughter|nephew|niece|grandson|grand-daughter|grandnephew|grandniece|in-law|stepson|stepdaughter|brother|sister|cousin'

# url base
wiki_url_base = "https://en.wikipedia.org" # wikipedia
imdb_base_url = 'https://www.imdb.com/title/'

# wikipedia language setting
wiki_wiki = wiki.Wikipedia('en')

ia = imdb.Cinemagoer()

In [3]:
top_250_mov_url = "https://imdb-api.com/en/API/Top250Movies/"
top_250_tv_url = "https://imdb-api.com/en/API/Top250TVs/"
pop_mov_url = "https://imdb-api.com/en/API/MostPopularMovies/"
pop_tv_url = "https://imdb-api.com/en/API/MostPopularTVs/"
box_office_all_time_url = "https://imdb-api.com/en/API/BoxOfficeAllTime/"

In [None]:
##########################
###     FUNCTIONS      ###
##########################

In [4]:
def get_imdb_lists(url, imdb_api_key):
    data = requests.get(url + imdb_api_key).json()
    list_of_titles = data['items']
    df = pd.DataFrame(list_of_titles)
    return df

In [80]:
def wiki_cast(title, year, tv_or_film): 
    print(title)
    title = title.replace(" ", "+").replace("&", "and")

    google_url = f"https://www.google.com/search?q={title}+{year}+{tv_or_film}+wikipedia"

    data_g = requests.get(google_url, headers=headers).text
    soup_g = BeautifulSoup(data_g,'html.parser') # full page

    links = []

    for link in soup_g.find_all('a'):
        l = link.get('href')
        links.append(l)
        
    # get only wikipedia links and get rid of image links
    # take first link (there are subsequent links for actors, other movies, etc)
    links = [s for s in links if 'https://en.wikipedia.org/wiki/' in s and 'img' not in s][0]
    
    # get the correct url
    wiki_url = re.search('url=(.*)&ved', links).group(1).replace('25', '')
    print(wiki_url)

    data = requests.get(wiki_url, headers=headers).text
    soup = BeautifulSoup(data,'html.parser') # full page

    # check if infobox exists
    infobox = soup.find("table",{"class":"infobox vevent"}) # infobox
    
    if infobox is None :
        return False # no infobox on wiki page -> not a nepo baby
        
    else :
        infobox_fields = infobox.find_all('th', {'class' : 'infobox-label'})

        does_starring_field_exist = bool(re.search("Starring", str(infobox_fields)))
        does_voices_field_exist = bool(re.search("Voices of", str(infobox_fields)))
        
        cast_list = []
        cast_links = []
        
        if does_voices_field_exist is False and does_starring_field_exist is False:
            cast_list = ["SKIP"]
            cast_links = ["SKIP"]
        else :
            if does_voices_field_exist is True:
                th_pattern = voice_pattern
            elif does_starring_field_exist is True:
                th_pattern = starring_pattern

            th = soup.find('th', string=th_pattern).parent
            li_list = th.find_all('li')

            # get cast names
            for li in li_list:
                a = li.find('a')
                if a == None :
                    cast_list.append(li.text)
                    cast_links.append("")
                else :
                    cast_list.append(a.text)
                    cast_links.append(a['href'])

            if len(cast_list) == 0 :
                cast_list = ["SKIP"]
                cast_links = ["SKIP"]
            else :
                cast_list
                cast_links

        return [cast_list, cast_links]

In [156]:
def title_specs(title_id):
   
    page_data =  requests.get(imdb_base_url+title_id, headers=headers).text
    soup = BeautifulSoup(page_data,'html.parser')
    
    # CAST
    # grab top cast section of imdb page
    top_cast = soup.find_all("a", {"class": "sc-bfec09a1-1 gfeYgX"})
    
    cast_list = []
    cast_links = []
    
    # get cast names
    for a in top_cast:
        cast_list.append(str(a.string))
        cast_links.append("")
        
    return [cast_list, cast_links]

In [132]:
# given name of person, outputs parent links or false if none
def wiki_scrape(name, wiki_name_link=None):
    print(name)
    
    if name == 'SKIP' :
        return 'SKIP'
    
    if wiki_name_link != "" :
        url = wiki_url_base + wiki_name_link
        does_page_exist = True
    else :
        subject = name.replace(" ", "_")
        url = wiki_url_base + "/wiki/" + subject

        # check if wiki page exists
        wiki_page = wiki_wiki.page(subject)
        does_page_exist = wiki_page.exists()
        does_page_exist

    if does_page_exist is False :
        return False # no wiki page -> not famous enough, not a nepo baby
    
    elif does_page_exist is True : 
        data = requests.get(url, headers=headers).text
        soup = BeautifulSoup(data,'html.parser') # full page
        
        # check if infobox exists
        infobox = soup.find("table",{"class":"infobox biography vcard"}) # infobox
        
        if infobox is None :
            return False # no infobox on wiki page -> not a nepo baby
        
        else :
            infobox_fields = infobox.find_all('th', {'class' : 'infobox-label'})
            
            does_parent_or_rel_field_exist = bool(re.search("Parent|Relative", str(infobox_fields))) # see if Parent or Relative field is listed in infobox
        
            if does_parent_or_rel_field_exist is False :
                return False # parent field not listed in infobox -> not a nepo baby
            
            elif does_parent_or_rel_field_exist is True :
                
                try :
                    does_parent_field_exist = bool(re.search("Parent", str(infobox_fields)))
                    does_relative_field_exist = bool(re.search("Relative", str(infobox_fields)))
                    
                    if does_parent_field_exist is True :   
                        parent_field = soup.find('th', string=parent_pattern).parent
                        parent_a_tags = parent_field.find_all('a')
                        
                        if len(parent_a_tags) == 0 :
                            return False # parents listed in infobox but not linked -> not a nepo baby
                        
                        else : # nepo baby!
                            parent_wiki_list = []
                            
                            for link in parent_field.find_all('a'):
                                parent_wiki = link.get('href')
                                parent_wiki_link = wiki_url_base + parent_wiki
                                parent_wiki_list.append(parent_wiki_link)

                                parent_wiki_list[:] = [x for x in parent_wiki_list if "cite_note" not in x] # cited entries are in <a href> tags so remove those links here
                                
                        return parent_wiki_list

                    elif does_relative_field_exist is True : 
                        relative_field = soup.find('th', string=relative_pattern).parent
                        relative_td_tags = relative_field.find_all('td')

                        list_of_relatives = re.split('</li>|<br/>', str(relative_td_tags))

                        parent_wiki_list = []

                        if len(relative_field.find_all('a')) == 0 :
                            return False
                        else :
                            for i in range(len(list_of_relatives)):
                                if bool(re.search(not_nepo_relationships, list_of_relatives[i])) is True:
                                    pass

                                else :
                                    try :
                                        href_match = re.search(r'href=\"(.*)\" title=', list_of_relatives[i])
                                        parent_wiki = href_match.group(1)
                                        parent_wiki_link = wiki_url_base + parent_wiki
                                        parent_wiki_list.append(parent_wiki_link)

                                    except AttributeError :
                                        pass  
                            
                            if len(parent_wiki_list) == 0 :
                                return False
                            else :
                                return parent_wiki_list
                
                except AttributeError :
                    pass


In [8]:
def pct_nepo(df, group_col):
    df = df[[group_col, 'nepos']]
    df = df[df.nepos != False].groupby(group_col).count() / df.groupby(group_col).count()
    df = df.rename(columns={'nepos': 'pct_nepo'})
    return df

In [137]:
def imdb_whole_shebang(url, tv_or_film):
    
    # get list of titles from imdb
    df = get_imdb_lists(url, api_key) 
    # get cast list and cast wiki links from wikipedia
    df[['cast', 'cast_link']] = df.apply(lambda row : pd.Series(wiki_cast(row['title'], row['year'], tv_or_film)), axis=1)
    # remove rows where it has failed
    df = df[(df["cast"] != False)]
    # explode lists into rows
    df = df.explode(['cast', 'cast_link']).reset_index(drop=True) # expand list of cast into rows

    # find the nepos
    df.loc[:,"nepos"] = df.apply(lambda row : wiki_scrape(row['cast'], row['cast_link']), axis=1) # check if each cast member is a nepo
    
    # remove where the wiki_scrape hasn't worked
    noskip_df = df[df['cast'] != 'SKIP'] # no starring field
    nolist_df = noskip_df[~noskip_df['cast'].str.contains("list", case=False)] # cast list linked elsewhere - not in infobox
    correct_df = nolist_df[~nolist_df['cast'].str.contains("various", case=False)] # cast list linked elsewhere - not in infobox
    
    # find cast through imdb of the titles where wikipedia was insufficient
    fix_cast_imdb = df[(df['cast'] == 'SKIP') | df['cast'].str.contains("list", case=False) | df['cast'].str.contains("various", case=False)]
    fix_cast_imdb[['cast', 'cast_link']] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb
    fix_cast_imdb = fix_cast_imdb.explode(['cast', 'cast_link']).reset_index(drop=True) # expand list of cast into rows
    fix_cast_imdb.loc[:,"nepos"] = fix_cast_imdb.apply(lambda row : wiki_scrape(row['cast']), axis=1)
    
    # join the wiki cast and imdb cast df
    df = pd.concat([correct_df, fix_cast_imdb]).reset_index(drop=True)
    # drop duplicates incase they were not removed from correct_df. take last dupe as its the imdb list
    df.drop_duplicates(subset=['id','cast'], keep='last') 
    
    # calc percent nepo by title
    df_pct_nepo = pct_nepo(df, "id")
    df = df.merge(df_pct_nepo, on=['id'], how="left") # merge data frames
    
    # export
    csv_name_match = re.search('API/(.*)/', url)
    csv_name = csv_name_match.group(1)
    
    df.to_csv(csv_name + ".csv", index=False)
    
    return df

In [138]:
df = imdb_whole_shebang(top_250_mov_url, "film")


The Shawshank Redemption
https://en.wikipedia.org/wiki/The_Shawshank_Redemption
The Godfather
https://en.wikipedia.org/wiki/The_Godfather
The Dark Knight
https://en.wikipedia.org/wiki/The_Dark_Knight
The Godfather Part II
https://en.wikipedia.org/wiki/The_Godfather_Part_II
12 Angry Men
https://en.wikipedia.org/wiki/12_Angry_Men_(1957_film)
Schindler's List
https://en.wikipedia.org/wiki/Schindler%27s_List
The Lord of the Rings: The Return of the King
https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Return_of_the_King
Pulp Fiction
https://en.wikipedia.org/wiki/Pulp_Fiction
The Lord of the Rings: The Fellowship of the Ring
https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Fellowship_of_the_Ring
The Good, the Bad and the Ugly
https://en.wikipedia.org/wiki/The_Good,_the_Bad_and_the_Ugly
Forrest Gump
https://en.wikipedia.org/wiki/Forrest_Gump
Fight Club
https://en.wikipedia.org/wiki/Fight_Club
The Lord of the Rings: The Two Towers
https://en.wikipedia.org/wiki/The_Lord_of_the_

For a Few Dollars More
https://en.wikipedia.org/wiki/For_a_Few_Dollars_More
Dangal
https://en.wikipedia.org/wiki/Dangal_(film)
Batman Begins
https://en.wikipedia.org/wiki/Batman_Begins
The Kid
https://en.wikipedia.org/wiki/The_Kid_(1921_film)
Some Like It Hot
https://en.wikipedia.org/wiki/Some_Like_It_Hot
The Father
https://en.wikipedia.org/wiki/The_Father_(2020_film)
All About Eve
https://en.wikipedia.org/wiki/All_About_Eve
Green Book
https://en.wikipedia.org/wiki/Green_Book_(film)
The Wolf of Wall Street
https://en.wikipedia.org/wiki/The_Wolf_of_Wall_Street_(2013_film)
Judgment at Nuremberg
https://en.wikipedia.org/wiki/Judgment_at_Nuremberg
Ran
https://en.wikipedia.org/wiki/Ran_(film)
Casino
https://en.wikipedia.org/wiki/Casino_(1995_film)
Pan's Labyrinth
https://en.wikipedia.org/wiki/Pan%27s_Labyrinth
Unforgiven
https://en.wikipedia.org/wiki/Unforgiven
There Will Be Blood
https://en.wikipedia.org/wiki/There_Will_Be_Blood
The Truman Show
https://en.wikipedia.org/wiki/The_Truman_Show

Gandhi
https://en.wikipedia.org/wiki/Gandhi_(film)
Rififi
https://en.wikipedia.org/wiki/Rififi
Tim Robbins
Morgan Freeman
Bob Gunton
William Sadler
Clancy Brown
Gil Bellows
James Whitmore
Marlon Brando
Al Pacino
James Caan
Richard Castellano
Robert Duvall
Sterling Hayden
John Marley
Richard Conte
Diane Keaton
Christian Bale
Michael Caine
Heath Ledger
Gary Oldman
Aaron Eckhart
Maggie Gyllenhaal
Morgan Freeman
Al Pacino
Robert Duvall
Diane Keaton
Robert De Niro
Talia Shire
Morgana King
John Cazale
Mariana Hill
Lee Strasberg
Henry Fonda
Lee J. Cobb
Ed Begley
E.G. Marshall
Jack Warden
Liam Neeson
Ben Kingsley
Ralph Fiennes
Caroline Goodall
Jonathan Sagalle
Embeth Davidtz
Elijah Wood
Ian McKellen
Liv Tyler
Viggo Mortensen
Sean Astin
Cate Blanchett
John Rhys-Davies
Bernard Hill
Billy Boyd
Dominic Monaghan
Orlando Bloom
Hugo Weaving
Miranda Otto
David Wenham
Karl Urban
John Noble
Andy Serkis
Ian Holm
Sean Bean
John Travolta
Samuel L. Jackson
Uma Thurman
Harvey Keitel
Tim Roth
Amanda Plummer
M

Gwyneth Paltrow
Josh Brolin
Yōji Matsuda
Yuriko Ishida
Yūko Tanaka
Kaoru Kobayashi
Masahiko Nishimura
Tsunehiko Kamijo
Akihiro Miwa
Mitsuko Mori
Hisaya Morishige
Robert De Niro
James Woods
Elizabeth McGovern
Joe Pesci
Burt Young
Tuesday Weld
Treat Williams
Robin Williams
Matt Damon
Ben Affleck
Stellan Skarsgård
Minnie Driver
Ryūnosuke Kamiki
Mone Kamishiraishi
Ellen Burstyn
Jared Leto
Jennifer Connelly
Marlon Wayans
Christopher McDonald
Tom Cruise
Miles Teller
Jennifer Connelly
Jon Hamm
Glen Powell
Lewis Pullman
Ed Harris
Val Kilmer
Gene Kelly
Donald O'Connor
Debbie Reynolds
Jean Hagen
Millard Mitchell
Cyd Charisse
Aamir Khan
R. Madhavan
Sharman Joshi
Kareena Kapoor
Boman Irani
Omi Vaidya
Tom Hanks
Tim Allen
Joan Cusack
Don Rickles
Wallace Shawn
John Ratzenberger
Estelle Harris
Ned Beatty
Michael Keaton
Jodi Benson
John Morris
Toshiro Mifune
Tatsuya Nakadai
Kyōko Kagawa
Tatsuya Mihashi
Yutaka Sada
Mark Hamill
Harrison Ford
Carrie Fisher
Billy Dee Williams
Anthony Daniels
David Prowse
K

Chiwetel Ejiofor
Michael Fassbender
Benedict Cumberbatch
Paul Dano
Garret Dillahunt
Paul Giamatti
Scoot McNairy
Lupita Nyong'o
Adepero Oduye
Sarah Paulson
Brad Pitt
Michael Kenneth Williams
Alfre Woodard
Daniel Radcliffe
Rupert Grint
Emma Watson
Helena Bonham Carter
Robbie Coltrane
Warwick Davis
Ralph Fiennes
Michael Gambon
John Hurt
Jason Isaacs
Gary Oldman
Alan Rickman
Maggie Smith
David Thewlis
Julie Walters
Charlton Heston
Jack Hawkins
Haya Harareet
Stephen Boyd
Hugh Griffith
Martha Scott
Cathy O'Donnell
Sam Jaffe
Victor Sjöström
Bibi Andersson
Gunnar Björnstrand
Ingrid Thulin
Folke Sundquist
Naima Wifstrand
Ben Affleck
Rosamund Pike
Neil Patrick Harris
Tyler Perry
Carrie Coon
Joseph Cotten
Alida Valli
Orson Welles
Trevor Howard
SKIP
Ralph Fiennes
F. Murray Abraham
Mathieu Amalric
Adrien Brody
Willem Dafoe
Jeff Goldblum
Harvey Keitel
Jude Law
Bill Murray
Edward Norton
Saoirse Ronan
Jason Schwartzman
Léa Seydoux
Tilda Swinton
Tom Wilkinson
Owen Wilson
Tony Revolori
Jason Schwartzman

ValueError: Columns must be same length as key

In [158]:
 title_specs("tt0027977") # get top cast from imdb
# df = df.apply(lambda row : wiki_scrape(row['cast']), axis=1)

[['Charles Chaplin',
  'Paulette Goddard',
  'Henry Bergman',
  'Tiny Sandford',
  'Chester Conklin',
  'Hank Mann',
  'Stanley Blystone',
  'Al Ernest Garcia',
  'Richard Alexander',
  'Cecil Reynolds',
  'Mira McKinney',
  'Murdock MacQuarrie',
  'Wilfred Lucas',
  'Edward LeSaint',
  'Fred Malatesta',
  'Sammy Stein',
  'Juana Sutton',
  'Ted Oliver'],
 ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']]

In [54]:
def how_many_nepos(title, year, tv_or_film):
    cast = wiki_cast(title, year, tv_or_film)
    df = pd.DataFrame(cast, columns=['cast'])
    df.loc[:,"nepos"] = df.apply(lambda row : wiki_scrape(row['cast']), axis=1)
    
    tot_cast = len(cast)
    num_nepos = len(df[(df['nepos']!=False)])
    
    nepos = df[(df['nepos']!=False)]['cast'].tolist()
    nepos_print = ', '.join(nepos)
    
    if num_nepos == 1 :
        verb = 'is a'
        plural = "y"
    else :
        verb = 'are'
        plural = "ies"    
    
    print(f'Out of the {tot_cast} top cast members, {num_nepos} {verb} nepo bab{plural}.')
    print(f"{nepos_print} {verb} nepo bab{plural}.")
                

In [63]:
how_many_nepos('21 Jump Street', '2012', 'film')

21 Jump Street
https://en.wikipedia.org/wiki/21_Jump_Street_(film)
Jonah Hill
Channing Tatum
Brie Larson
Dave Franco
Rob Riggle
Ice Cube
Out of the 6 top cast members, 2 are nepo babies.
Jonah Hill, Dave Franco are nepo babies.


In [None]:
##########################
###   GET IMDB DATA    ###
##########################

In [62]:
top_250_tv = imdb_whole_shebang(top_250_tv_url, 'tv')

Planet Earth II
https://en.wikipedia.org/wiki/Planet_Earth_II
Breaking Bad
https://en.wikipedia.org/wiki/Breaking_Bad
Planet Earth
https://en.wikipedia.org/wiki/Planet_Earth_(2006_TV_series)
Band of Brothers
https://en.wikipedia.org/wiki/Band_of_Brothers_(miniseries)
Chernobyl
https://en.wikipedia.org/wiki/Chernobyl_(miniseries)
The Wire
https://en.wikipedia.org/wiki/The_Wire
Blue Planet II
https://en.wikipedia.org/wiki/Blue_Planet_II
Avatar: The Last Airbender
https://en.wikipedia.org/wiki/Avatar:_The_Last_Airbender
Cosmos: A Spacetime Odyssey
https://en.wikipedia.org/wiki/Cosmos:_A_Spacetime_Odyssey
The Sopranos
https://en.wikipedia.org/wiki/The_Sopranos
Cosmos
https://en.wikipedia.org/wiki/Cosmos:_A_Personal_Voyage
Our Planet
https://en.wikipedia.org/wiki/Our_Planet
Game of Thrones
https://en.wikipedia.org/wiki/Game_of_Thrones
Rick and Morty
https://en.wikipedia.org/wiki/Rick_and_Morty
The World at War
https://en.wikipedia.org/wiki/The_World_at_War
Fullmetal Alchemist: Brotherhood
h

https://en.wikipedia.org/wiki/List_of_Naruto:_Shippuden_episodes
Atlanta
https://en.wikipedia.org/wiki/Atlanta_(TV_series)
Primal
https://en.wikipedia.org/wiki/Primal_(TV_series)
Code Geass
https://en.wikipedia.org/wiki/Code_Geass
Blackadder II
https://en.wikipedia.org/wiki/Blackadder_II
The Bridge
https://en.wikipedia.org/wiki/The_Bridge_(2011_TV_series)
Daredevil
https://en.wikipedia.org/wiki/Daredevil_(TV_series)
It's a Sin
https://en.wikipedia.org/wiki/It%27s_a_Sin_(TV_series)
Heartstopper
https://en.wikipedia.org/wiki/Heartstopper_(TV_series)
Demon Slayer: Kimetsu no Yaiba
https://en.wikipedia.org/wiki/Demon_Slayer:_Kimetsu_no_Yaiba
Ramayan
https://en.wikipedia.org/wiki/Ramayan_(1987_TV_series)
Archer
https://en.wikipedia.org/wiki/Archer_(2009_TV_series)
Mystery Science Theater 3000
https://en.wikipedia.org/wiki/Mystery_Science_Theater_3000
Monster
https://en.wikipedia.org/wiki/Monster_(manga)
Lonesome Dove
https://en.wikipedia.org/wiki/Lonesome_Dove_(miniseries)
Adventure Time
ht

Queer Eye
https://en.wikipedia.org/wiki/Queer_Eye_(2018_TV_series)
SKIP
Bryan Cranston
Anna Gunn
Aaron Paul
Dean Norris
Betsy Brandt
RJ Mitte
Giancarlo Esposito
Bob Odenkirk
Jonathan Banks
Laura Fraser
Jesse Plemons
SKIP
Kirk Acevedo
Eion Bailey
Michael Cudlitz
Dale Dye
Rick Gomez
Scott Grimes
Frank John Hughes
Damian Lewis
Ron Livingston
James Madio
Neal McDonough
Rene L. Moreno
David Schwimmer
Richard Speight Jr.
Donnie Wahlberg
Matthew Settle
Douglas Spain
Rick Warden
Marc Warren
Shane Taylor
Dexter Fletcher
Colin Hanks
Ross McCall
Jared Harris
Stellan Skarsgård
Paul Ritter
Jessie Buckley
Adam Nagaitis
Con O'Neill
Adrian Rawlins
Sam Troughton
Robert Emms
Emily Watson
David Dencik
Mark Lewis Jones
Alan Williams
Alex Ferns
Ralph Ineson
Barry Keoghan
Fares Fares
Michael McElhatton
Dominic West
John Doman
Idris Elba
Frankie Faison
Larry Gilliard Jr.
Wood Harris
Deirdre Lovejoy
Wendell Pierce
Lance Reddick
Andre Royo
Sonja Sohn
Chris Bauer
Paul Ben-Victor
Clarke Peters
Amy Ryan
Aidan Gil

Cathy Cahlin Ryan
David Rees Snell
Paula Garcés
David Marciano
Hugh Laurie
Lisa Edelstein
Omar Epps
Robert Sean Leonard
Jennifer Morrison
Jesse Spencer
Peter Jacobson
Kal Penn
Olivia Wilde
Amber Tamblyn
Odette Annable
Charlyne Yi
Edward James Olmos
Mary McDonnell
Katee Sackhoff
Jamie Bamber
James Callis
Tricia Helfer
Grace Park
Michael Hogan
Aaron Douglas
Tahmoh Penikett
Paul Campbell
Nicki Clyne
Michael Trucco
Alessandro Juliani
Kandyse McClure
Hugh Bonneville
Jessica Brown Findlay
Laura Carmichael
Jim Carter
Brendan Coyle
Michelle Dockery
Siobhan Finneran
Joanne Froggatt
Phyllis Logan
Thomas Howes
Rob James-Collier
Rose Leslie
Elizabeth McGovern
Sophie McShera
Lesley Nicol
Maggie Smith
Dan Stevens
Penelope Wilton
Amy Nuttall
Kevin Doyle
Allen Leech
Matt Milne
Ed Speleers
Lily James
David Robb
Cara Theobold
Raquel Cassidy
Tom Cullen
Julian Ovenden
Michael Fox
Matthew Goode
Harry Hadden-Paton
Winona Ryder
David Harbour
Finn Wolfhard
Millie Bobby Brown
Gaten Matarazzo
Caleb McLaughlin
N

Miles Teller
Matthew Goode
Dan Fogler
Burn Gorman
Colin Hanks
Giovanni Ribisi
Juno Temple
Anya Taylor-Joy
Bill Camp
Moses Ingram
Isla Johnston
Christiane Seidel
Rebecca Root
Chloe Pirrie
Akemnji Ndifornyen
Marielle Heller
Harry Melling
Patrick Kennedy
Jacob Fortune-Lloyd
Thomas Brodie-Sangster
Marcin Dorociński
SKIP
Brian "Q" Quinn
James "Murr" Murray
Sal Vulcano
Joe Gatto
Jemaine Clement
Bret McKenzie
Rhys Darby
Kristen Schaal
Arj Barker
Kevin Conroy
George Newbern
Susan Eisenberg
Phil LaMarr
Michael Rosenbaum
Carl Lumbly
Maria Canals-Barrera
James Urbaniak
Patrick Warburton
Michael Sinterniklaas
Chris McCulloch
Doc Hammer
Steven Rattazzi
Dana Snyder
SKIP
Martin Landau
Barbara Bain
Barry Morse
Prentis Hancock
Nick Tate
Zienia Merton
Anton Phillips
Suzanne Roquette
Clifton Jones
Catherine Schell
Tony Anholt
John Hug
Jeffery Kissoon
Yasuko Nagazumi
Sam Dastor
Alibe Parsons
SKIP
Daniel Baldwin
Richard Belzer
Andre Braugher
Clark Johnson
Yaphet Kotto
Melissa Leo
Jon Polito
Kyle Secor
Ned 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


David Attenborough
Chadden Hunter
Gordon Buchanan
Elizabeth White
Fredi Devas
Thomas Crowley
Ed Charles
Emma Napper
Toby Nowlan
Barrie Britton
Pete McCowen
Michael J. Sanderson
Max Hug Williams
Armin Holtze
J.P. Magnan
Jon Griffith
Jerome Poncet
Sandesh Kadur
Sigourney Weaver
David Attenborough
Nikolay Drozdov
Sanae Ueda
Thomas Anguti Johnston
Huw Cordey
Doug Allan
Mark Linfield
Chadden Hunter
Paul Stewart
Michael Kelem
Jeff Wilson
Rick Rosenthal
Warwick Sloss
Simon King
Jonathan Keeling
Justine Evans
Peter Scoones
David Attenborough
Peter Drost
Russell Boulter
Roger Munns
Roger Horrocks
Neil deGrasse Tyson
Christopher Emerson
Keythe Farley
Piotr Michael
Enn Reitel
Stoney Emshwiller
André Sogliuzzo
Amanda Seyfried
Kirsten Dunst
Phil LaMarr
Paul Sorvino
Larry Cedar
Christiane Amanpour
Seth MacFarlane
Fred Tatasciore
Martin Jarvis
Glenn Steinbaum
Maria Frucci
Carl Sagan
Jaromír Hanzlík
Jonathan Fahn
Bob Hevelone
Jean Charney
Linda Morabito
Cecilia White
Larry Soderblom
Ronald A. Hilbert


Tarla Joshi
Jeremy Brett
David Burke
Rosalie Williams
Eric Porter
Gayle Hunnicutt
Jeremy Kemp
Rosalind Knight
Tenniel Evans
David Gwillim
Norman Jones
Barbara Wilshere
Joss Ackland
Nicholas Clay
Charles Gray
Rosalie Crutchley
Roger Hammond
Betsy Brantley
Rosalyn Landor
Jeremy Clarkson
Richard Hammond
James May
The Stig
Ben Collins
Chris Harris
Andrew Flintoff
Paddy McGuinness
Matt LeBlanc
Rory Reid
Perry McCarthy
Sabine Schmitz
Jason Dawe
Andy Wilman
Chris Evans
Josh Cohen
Tom Chilton
Eddie Jordan
Harish Bhimani
Nitish Bharadwaj
Mukesh Khanna
Gajendra Chauhan
Praveen Kumar Sobti
Feroz Khan
Roopa Ganguly
Puneet Issar
Pankaj Dheer
Gufi Paintal
Girija Shankar
Surendra Pal
Renuka Israni
Nazneen
Virendra Razdan
Vinod Kapoor
Sameer Chitre
Sanjeev Chitre
Andrew Jarecki
Robert Durst
Gary Napoli
Jeanine Pirro
Michael Simon Hall
Kevin J. Hynes
Debra Kay Anderson
Dick DeGuerin
Chelsea Gonzalez
Marc Smerling
Susan Berman
Gilberte Najamy
Charles Bagli
Jeszenia Jimenez
Cody Cazalas
Sareb Kaufman
Ell

Antonio Te Maioha
Stephen Lovatt
Shane Rangi
Jessica Grace Smith
Raicho Vasilev
Ioane King
Marisa Ramirez
Gareth Williams
Jeffrey Thomas
Jaime Murray
Ralph Macchio
William Zabka
Courtney Henggeler
Xolo Maridueña
Tanner Buchanan
Mary Mouser
Jacob Bertrand
Gianni DeCenzo
Martin Kove
Vanessa Rubio
Peyton List
Owen Morgan
Aedin Mincks
Jayden Rivers
Joe Seo
Griffin Santopietro
Khalil Everage
Hannah Kepple
Serban Pavlu
Maria Obretin
Andreea Vasile
Madalina Craiu
Gabriel Huian
Sergiu Costache
Doru Ana
Dan Hurduc
Stefan Velniciuc
Laurentiu Bãnescu
Sorin Cocis
Mihai Calin
Vitalie Bantas
Dorel Visan
Costel Cascaval
Augustin Viziru
Silvana Mihai
Ion Grosu
Francis Mallmann
Ruth Reichl
Massimo Bottura
Bill Buford
Magnus Nilsson
Lara Gilmore
Dan Barber
Niki Nakayama
Ben Shewry
David Chang
Dominique Crenn
Grant Achatz
Daniel Humm
Gaggan Anand
Valter Kramar
Alex Atala
Ana Ros
Colman Andrews
Alexander Skarsgård
James Ransone
Lee Tergesen
Jon Huertas
Stark Sands
Billy Lush
Jonah Lotan
Wilson Bethel
Pawe

In [66]:
top_250_mov = imdb_whole_shebang(top_250_mov_url, "film")
top_250_tv = imdb_whole_shebang(top_250_tv_url, "tv")
pop_mov = imdb_whole_shebang(pop_mov_url, "film")
pop_tv = imdb_whole_shebang(pop_tv_url, 'tv')
#box_office = imdb_whole_shebang(box_office_all_time_url)

The Shawshank Redemption
https://en.wikipedia.org/wiki/The_Shawshank_Redemption
The Godfather
https://en.wikipedia.org/wiki/The_Godfather
The Dark Knight
https://en.wikipedia.org/wiki/The_Dark_Knight
The Godfather Part II
https://en.wikipedia.org/wiki/The_Godfather_Part_II
12 Angry Men
https://en.wikipedia.org/wiki/12_Angry_Men_(1957_film)
Schindler's List
https://en.wikipedia.org/wiki/Schindler%27s_List
The Lord of the Rings: The Return of the King
https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Return_of_the_King
Pulp Fiction
https://en.wikipedia.org/wiki/Pulp_Fiction
The Lord of the Rings: The Fellowship of the Ring
https://en.wikipedia.org/wiki/The_Lord_of_the_Rings:_The_Fellowship_of_the_Ring
The Good, the Bad and the Ugly
https://en.wikipedia.org/wiki/The_Good,_the_Bad_and_the_Ugly
Forrest Gump
https://en.wikipedia.org/wiki/Forrest_Gump
Fight Club
https://en.wikipedia.org/wiki/Fight_Club
The Lord of the Rings: The Two Towers
https://en.wikipedia.org/wiki/The_Lord_of_the_

For a Few Dollars More
https://en.wikipedia.org/wiki/For_a_Few_Dollars_More
Dangal
https://en.wikipedia.org/wiki/Dangal_(film)
Batman Begins
https://en.wikipedia.org/wiki/Batman_Begins
The Kid
https://en.wikipedia.org/wiki/The_Kid_(1921_film)
Some Like It Hot
https://en.wikipedia.org/wiki/Some_Like_It_Hot
The Father
https://en.wikipedia.org/wiki/The_Father_(2020_film)
All About Eve
https://en.wikipedia.org/wiki/All_About_Eve
Green Book
https://en.wikipedia.org/wiki/Green_Book_(film)
The Wolf of Wall Street
https://en.wikipedia.org/wiki/The_Wolf_of_Wall_Street_(2013_film)
Judgment at Nuremberg
https://en.wikipedia.org/wiki/Judgment_at_Nuremberg
Ran
https://en.wikipedia.org/wiki/Ran_(film)
Casino
https://en.wikipedia.org/wiki/Casino_(1995_film)
Pan's Labyrinth
https://en.wikipedia.org/wiki/Pan%27s_Labyrinth
Unforgiven
https://en.wikipedia.org/wiki/Unforgiven
There Will Be Blood
https://en.wikipedia.org/wiki/There_Will_Be_Blood
The Truman Show
https://en.wikipedia.org/wiki/The_Truman_Show

Gandhi
https://en.wikipedia.org/wiki/Gandhi_(film)
Rififi
https://en.wikipedia.org/wiki/Rififi
Tim Robbins
Morgan Freeman
Bob Gunton
William Sadler
Clancy Brown
Gil Bellows
James Whitmore
Marlon Brando
Al Pacino
James Caan
Richard Castellano
Robert Duvall
Sterling Hayden
John Marley
Richard Conte
Diane Keaton
Christian Bale
Michael Caine
Heath Ledger
Gary Oldman
Aaron Eckhart
Maggie Gyllenhaal
Morgan Freeman
Al Pacino
Robert Duvall
Diane Keaton
Robert De Niro
Talia Shire
Morgana King
John Cazale
Mariana Hill
Lee Strasberg
Henry Fonda
Lee J. Cobb
Ed Begley
E.G. Marshall
Jack Warden
Liam Neeson
Ben Kingsley
Ralph Fiennes
Caroline Goodall
Jonathan Sagalle
Embeth Davidtz
Elijah Wood
Ian McKellen
Liv Tyler
Viggo Mortensen
Sean Astin
Cate Blanchett
John Rhys-Davies
Bernard Hill
Billy Boyd
Dominic Monaghan
Orlando Bloom
Hugo Weaving
Miranda Otto
David Wenham
Karl Urban
John Noble
Andy Serkis
Ian Holm
Sean Bean
John Travolta
Samuel L. Jackson
Uma Thurman
Harvey Keitel
Tim Roth
Amanda Plummer
M

Gwyneth Paltrow
Josh Brolin
Yōji Matsuda
Yuriko Ishida
Yūko Tanaka
Kaoru Kobayashi
Masahiko Nishimura
Tsunehiko Kamijo
Akihiro Miwa
Mitsuko Mori
Hisaya Morishige
Robert De Niro
James Woods
Elizabeth McGovern
Joe Pesci
Burt Young
Tuesday Weld
Treat Williams
Robin Williams
Matt Damon
Ben Affleck
Stellan Skarsgård
Minnie Driver
Ryūnosuke Kamiki
Mone Kamishiraishi
Ellen Burstyn
Jared Leto
Jennifer Connelly
Marlon Wayans
Christopher McDonald
Tom Cruise
Miles Teller
Jennifer Connelly
Jon Hamm
Glen Powell
Lewis Pullman
Ed Harris
Val Kilmer
Gene Kelly
Donald O'Connor
Debbie Reynolds
Jean Hagen
Millard Mitchell
Cyd Charisse
Aamir Khan
R. Madhavan
Sharman Joshi
Kareena Kapoor
Boman Irani
Omi Vaidya
Tom Hanks
Tim Allen
Joan Cusack
Don Rickles
Wallace Shawn
John Ratzenberger
Estelle Harris
Ned Beatty
Michael Keaton
Jodi Benson
John Morris
Toshiro Mifune
Tatsuya Nakadai
Kyōko Kagawa
Tatsuya Mihashi
Yutaka Sada
Mark Hamill
Harrison Ford
Carrie Fisher
Billy Dee Williams
Anthony Daniels
David Prowse
K

Chiwetel Ejiofor
Michael Fassbender
Benedict Cumberbatch
Paul Dano
Garret Dillahunt
Paul Giamatti
Scoot McNairy
Lupita Nyong'o
Adepero Oduye
Sarah Paulson
Brad Pitt
Michael Kenneth Williams
Alfre Woodard
Daniel Radcliffe
Rupert Grint
Emma Watson
Helena Bonham Carter
Robbie Coltrane
Warwick Davis
Ralph Fiennes
Michael Gambon
John Hurt
Jason Isaacs
Gary Oldman
Alan Rickman
Maggie Smith
David Thewlis
Julie Walters
Charlton Heston
Jack Hawkins
Haya Harareet
Stephen Boyd
Hugh Griffith
Martha Scott
Cathy O'Donnell
Sam Jaffe
Victor Sjöström
Bibi Andersson
Gunnar Björnstrand
Ingrid Thulin
Folke Sundquist
Naima Wifstrand
Ben Affleck
Rosamund Pike
Neil Patrick Harris
Tyler Perry
Carrie Coon
Joseph Cotten
Alida Valli
Orson Welles
Trevor Howard
SKIP
Ralph Fiennes
F. Murray Abraham
Mathieu Amalric
Adrien Brody
Willem Dafoe
Jeff Goldblum
Harvey Keitel
Jude Law
Bill Murray
Edward Norton
Saoirse Ronan
Jason Schwartzman
Léa Seydoux
Tilda Swinton
Tom Wilkinson
Owen Wilson
Tony Revolori
Jason Schwartzman

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


François Cluzet
Omar Sy
Anne Le Ny
Audrey Fleurot
Joséphine de Meaux
Clotilde Mollet
Alba Gaïa Bellugi
Cyril Mendy
Salimata Kamate
Absa Diatou Toure
Grégoire Oestermann
Dominique Daguier
François Caron
Christian Ameri
Thomas Solivérès
Dorothée Brière
Marie-Laure Descoureaux
Émilie Caen
Charles Chaplin
Paulette Goddard
Henry Bergman
Tiny Sandford
Chester Conklin
Hank Mann
Stanley Blystone
Al Ernest Garcia
Richard Alexander
Cecil Reynolds
Mira McKinney
Murdock MacQuarrie
Wilfred Lucas
Edward LeSaint
Fred Malatesta
Sammy Stein
Juana Sutton
Ted Oliver
Tyrone Power
Marlene Dietrich
Charles Laughton
Elsa Lanchester
John Williams
Henry Daniell
Ian Wolfe
Torin Thatcher
Norma Varden
Una O'Connor
Francis Compton
Philip Tonge
Ruta Lee
Patrick Aherne
Don Ames
Walter Bacon
Eddie Baker
Benjie Bancroft
Sigourney Weaver
Michael Biehn
Carrie Henn
Paul Reiser
Lance Henriksen
Bill Paxton
William Hope
Jenette Goldstein
Al Matthews
Mark Rolston
Ricco Ross
Colette Hiller
Daniel Kash
Cynthia Dale Scott
Tip T

https://en.wikipedia.org/wiki/Only_Fools_and_Horses
The Civil War
https://en.wikipedia.org/wiki/The_Civil_War_(miniseries)
Hunter x Hunter
https://en.wikipedia.org/wiki/Hunter_%C3%97_Hunter_(2011_TV_series)
True Detective
https://en.wikipedia.org/wiki/True_Detective
Better Call Saul
https://en.wikipedia.org/wiki/Better_Call_Saul
Seinfeld
https://en.wikipedia.org/wiki/Seinfeld
The Beatles: Get Back
https://en.wikipedia.org/wiki/The_Beatles:_Get_Back
Dekalog
https://en.wikipedia.org/wiki/Dekalog
Persona
https://en.wikipedia.org/wiki/%C5%9Eahsiyet
Fargo
https://en.wikipedia.org/wiki/Fargo_(TV_series)
Cowboy Bebop
https://en.wikipedia.org/wiki/Cowboy_Bebop
Clarkson's Farm
https://en.wikipedia.org/wiki/Clarkson%27s_Farm
Nathan for You
https://en.wikipedia.org/wiki/Nathan_for_You
Gravity Falls
https://en.wikipedia.org/wiki/Gravity_Falls
Last Week Tonight with John Oliver
https://en.wikipedia.org/wiki/Last_Week_Tonight_with_John_Oliver
When They See Us
https://en.wikipedia.org/wiki/When_They_

Boardwalk Empire
https://en.wikipedia.org/wiki/Boardwalk_Empire
Justified
https://en.wikipedia.org/wiki/Justified_(TV_series)
The Eric Andre Show
https://en.wikipedia.org/wiki/The_Eric_Andre_Show
The Newsroom
https://en.wikipedia.org/wiki/The_Newsroom_(American_TV_series)
Pose
https://en.wikipedia.org/wiki/Pose_(TV_series)
The Haunting of Hill House
https://en.wikipedia.org/wiki/The_Haunting_of_Hill_House_(TV_series)
The Bugs Bunny Show
https://en.wikipedia.org/wiki/The_Bugs_Bunny_Show
Crash Landing on You
https://en.wikipedia.org/wiki/Crash_Landing_on_You
The Offer
https://en.wikipedia.org/wiki/The_Offer
The Queen's Gambit
https://en.wikipedia.org/wiki/The_Queen%27s_Gambit_(miniseries)
Formula 1: Drive to Survive
https://en.wikipedia.org/wiki/Formula_1:_Drive_to_Survive
Impractical Jokers
https://en.wikipedia.org/wiki/Impractical_Jokers
Flight of the Conchords
https://en.wikipedia.org/wiki/Flight_of_the_Conchords_(TV_series)
Justice League
https://en.wikipedia.org/wiki/Justice_League_

Harry Lloyd
JB Blanc
Reed Shannon
Mick Wingert
SKIP
Steve Carell
Rainn Wilson
John Krasinski
Jenna Fischer
B. J. Novak
Melora Hardin
David Denman
Leslie David Baker
Brian Baumgartner
Kate Flannery
Angela Kinsey
Oscar Nunez
Phyllis Smith
Ed Helms
Mindy Kaling
Paul Lieberstein
Creed Bratton
Craig Robinson
Ellie Kemper
Zach Woods
Amy Ryan
James Spader
Catherine Tate
Clark Duke
Jake Lacy
Nathan Fillion
Gina Torres
Alan Tudyk
Morena Baccarin
Adam Baldwin
Jewel Staite
Sean Maher
Summer Glau
Ron Glass
SKIP
SKIP
SKIP
SKIP
Matthew McConaughey
Woody Harrelson
Michelle Monaghan
Michael Potts
Tory Kittles
Colin Farrell
Rachel McAdams
Taylor Kitsch
Kelly Reilly
Vince Vaughn
Mahershala Ali
Carmen Ejogo
Stephen Dorff
Scoot McNairy
Ray Fisher
Jodie Foster
Kali Reis
Bob Odenkirk
Jonathan Banks
Rhea Seehorn
Patrick Fabian
Michael Mando
Michael McKean
Giancarlo Esposito
Tony Dalton
Jerry Seinfeld
Julia Louis-Dreyfus
Michael Richards
Jason Alexander
John Lennon
Paul McCartney
George Harrison
Ringo Starr
S

Connie Britton
Gaius Charles
Zach Gilford
Minka Kelly
Adrianne Palicki
Taylor Kitsch
Jesse Plemons
Scott Porter
Aimee Teegarden
Michael B. Jordan
Jurnee Smollett
Matt Lauria
Madison Burge
Grey Damon
SKIP
Mike Skinner
Abbie Eaton
Claire Foy
Matt Smith
Vanessa Kirby
Eileen Atkins
Jeremy Northam
Victoria Hamilton
Ben Miles
Greg Wise
Jared Harris
John Lithgow
Alex Jennings
Lia Williams
Anton Lesser
Matthew Goode
Olivia Colman
Tobias Menzies
Helena Bonham Carter
Ben Daniels
Jason Watkins
Marion Bailey
Erin Doherty
Charles Dance
Josh O'Connor
Gillian Anderson
Emma Corrin
Stephen Boxer
Emerald Fennell
Imelda Staunton
Jonathan Pryce
Lesley Manville
Dominic West
Jonny Lee Miller
Olivia Williams
Claudia Harrison
Natascha McElhone
Marcia Warren
Elizabeth Debicki
Kevin Conroy
George Newbern
Susan Eisenberg
Phil LaMarr
Michael Rosenbaum
Carl Lumbly
Maria Canals
Peter Capaldi
Chris Addison
Chris Langham
Joanna Scanlan
James Smith
Polly Kemp
Rebecca Front
Roger Allam
Vincent Franklin
Olivia Poulet
Wi

Ricky Gervais
Martin Freeman
Mackenzie Crook
Lucy Davis
Stirling Gallacher
Oliver Chris
Ralph Ineson
Patrick Baladi
Stacey Roca
Elizabeth Berrington
SKIP
Jared Keeso
Nathan Dales
Michelle Mylett
K. Trevor Wilson
Dylan Playfair
Andrew Herr
Tyler Johnston
Alexander de Jordy
Daniel Petronijevic
Melanie Scrofano
Jacob Tierney
Lisa Codrington
Tiio Horn
Evan Stern
Mark Forward
Sarah Gadon
Clark Backo
James Daly
Magalie Lépine-Blondeau
Tyler Hynes
Kamilla Kowal
Ryan Stiles
Colin Mochrie
Wayne Brady
William H. Macy
Emmy Rossum
Justin Chatwin
Ethan Cutkosky
Shanola Hampton
Steve Howey
Emma Kenney
Jeremy Allen White
Cameron Monaghan
Noel Fisher
Joan Cusack
Laura Slade Wiggins
Zach McGowan
Emma Greenwell
Jake McDorman
Emily Bergl
Isidora Goreshter
Richard Flood
Christian Isaiah
Kate Miner
Evan Rachel Wood
Thandiwe Newton
Jeffrey Wright
James Marsden
Ingrid Bolsø Berdal
Luke Hemsworth
Sidse Babett Knudsen
Simon Quarterman
Rodrigo Santoro
Angela Sarafyan
Shannon Woodward
Ed Harris
Anthony Hopkins
B

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


David Attenborough
Chadden Hunter
Gordon Buchanan
Elizabeth White
Fredi Devas
Thomas Crowley
Ed Charles
Emma Napper
Toby Nowlan
Barrie Britton
Pete McCowen
Michael J. Sanderson
Max Hug Williams
Armin Holtze
J.P. Magnan
Jon Griffith
Jerome Poncet
Sandesh Kadur
Sigourney Weaver
David Attenborough
Nikolay Drozdov
Sanae Ueda
Thomas Anguti Johnston
Huw Cordey
Doug Allan
Mark Linfield
Chadden Hunter
Paul Stewart
Michael Kelem
Jeff Wilson
Rick Rosenthal
Warwick Sloss
Simon King
Jonathan Keeling
Justine Evans
Peter Scoones
David Attenborough
Peter Drost
Russell Boulter
Roger Munns
Roger Horrocks
Neil deGrasse Tyson
Christopher Emerson
Keythe Farley
Piotr Michael
Enn Reitel
Stoney Emshwiller
André Sogliuzzo
Amanda Seyfried
Kirsten Dunst
Phil LaMarr
Paul Sorvino
Larry Cedar
Christiane Amanpour
Seth MacFarlane
Fred Tatasciore
Martin Jarvis
Glenn Steinbaum
Maria Frucci
Carl Sagan
Jaromír Hanzlík
Jonathan Fahn
Bob Hevelone
Jean Charney
Linda Morabito
Cecilia White
Larry Soderblom
Ronald A. Hilbert


Tarla Joshi
Jeremy Brett
David Burke
Rosalie Williams
Eric Porter
Gayle Hunnicutt
Jeremy Kemp
Rosalind Knight
Tenniel Evans
David Gwillim
Norman Jones
Barbara Wilshere
Joss Ackland
Nicholas Clay
Charles Gray
Rosalie Crutchley
Roger Hammond
Betsy Brantley
Rosalyn Landor
Jeremy Clarkson
Richard Hammond
James May
The Stig
Ben Collins
Chris Harris
Andrew Flintoff
Paddy McGuinness
Matt LeBlanc
Rory Reid
Perry McCarthy
Sabine Schmitz
Jason Dawe
Andy Wilman
Chris Evans
Josh Cohen
Tom Chilton
Eddie Jordan
Harish Bhimani
Nitish Bharadwaj
Mukesh Khanna
Gajendra Chauhan
Praveen Kumar Sobti
Feroz Khan
Roopa Ganguly
Puneet Issar
Pankaj Dheer
Gufi Paintal
Girija Shankar
Surendra Pal
Renuka Israni
Nazneen
Virendra Razdan
Vinod Kapoor
Sameer Chitre
Sanjeev Chitre
Andrew Jarecki
Robert Durst
Gary Napoli
Jeanine Pirro
Michael Simon Hall
Kevin J. Hynes
Debra Kay Anderson
Dick DeGuerin
Chelsea Gonzalez
Marc Smerling
Susan Berman
Gilberte Najamy
Charles Bagli
Jeszenia Jimenez
Cody Cazalas
Sareb Kaufman
Ell

Antonio Te Maioha
Stephen Lovatt
Shane Rangi
Jessica Grace Smith
Raicho Vasilev
Ioane King
Marisa Ramirez
Gareth Williams
Jeffrey Thomas
Jaime Murray
Ralph Macchio
William Zabka
Courtney Henggeler
Xolo Maridueña
Tanner Buchanan
Mary Mouser
Jacob Bertrand
Gianni DeCenzo
Martin Kove
Vanessa Rubio
Peyton List
Owen Morgan
Aedin Mincks
Jayden Rivers
Joe Seo
Griffin Santopietro
Khalil Everage
Hannah Kepple
Serban Pavlu
Maria Obretin
Andreea Vasile
Madalina Craiu
Gabriel Huian
Sergiu Costache
Doru Ana
Dan Hurduc
Stefan Velniciuc
Laurentiu Bãnescu
Sorin Cocis
Mihai Calin
Vitalie Bantas
Dorel Visan
Costel Cascaval
Augustin Viziru
Silvana Mihai
Ion Grosu
Francis Mallmann
Ruth Reichl
Massimo Bottura
Bill Buford
Magnus Nilsson
Lara Gilmore
Dan Barber
Niki Nakayama
Ben Shewry
David Chang
Dominique Crenn
Grant Achatz
Daniel Humm
Gaggan Anand
Valter Kramar
Alex Atala
Ana Ros
Colman Andrews
Alexander Skarsgård
James Ransone
Lee Tergesen
Jon Huertas
Stark Sands
Billy Lush
Jonah Lotan
Wilson Bethel
Pawe

Where the Crawdads Sing
https://en.wikipedia.org/wiki/Where_the_Crawdads_Sing_(film)
The Holiday
https://en.wikipedia.org/wiki/The_Holiday
Scream
https://en.wikipedia.org/wiki/Scream_(2022_film)
Strange World
https://en.wikipedia.org/wiki/Strange_World_(film)
The Wonder
https://en.wikipedia.org/wiki/The_Wonder_(film)
Freddy
https://en.wikipedia.org/wiki/Freddy_(film)
Zoolander
https://en.wikipedia.org/wiki/Zoolander
Doctor G
https://en.wikipedia.org/wiki/Doctor_G
Farha
https://en.wikipedia.org/wiki/Farha_(film)
The Godfather
https://en.wikipedia.org/wiki/The_Godfather
I Believe in Santa
https://en.wikipedia.org/wiki/J%27ai_Rencontr%C3%A9_Le_P%C3%A8re_No%C3%ABl
This Is Christmas
https://en.wikipedia.org/wiki/This_Is_Christmas_(film)
The Super Mario Bros. Movie
https://en.wikipedia.org/wiki/The_Super_Mario_Bros._Movie
Pearl
https://en.wikipedia.org/wiki/Pearl_(2022_film)
Pulp Fiction
https://en.wikipedia.org/wiki/Pulp_Fiction
The Santa Clause 2
https://en.wikipedia.org/wiki/The_Santa_Cla

Samson Kayo
John Mulaney
Wagner Moura
Da'Vine Joy Randolph
Anthony Mendez
Felix Kammerer
Albrecht Schuch
Daniel Brühl
Sebastian Hülk
Benedict Cumberbatch
Rashida Jones
Kenan Thompson
Angela Lansbury
Pharrell Williams
Cate Blanchett
Noémie Merlant
Nina Hoss
Sophie Kauer
Julian Glover
Allan Corduner
Mark Strong
SKIP
Luke Evans
Olivia Colman
Jessie Buckley
Jonathan Pryce
Johnny Flynn
James Cosmo
Trevor Dion Nicholas
Nathalie Issa
Manal Issa
Ahmed Malek
Matthias Schweighöfer
Ali Suliman
Kinda Alloush
James Krishna Floyd
Riley Dandy
Sam Delich
Jonah Ray
Rishab Shetty
Kishore
Achyuth Kumar
Sapthami Gowda
Daniel Kaluuya
Keke Palmer
Steven Yeun
Michael Wincott
Brandon Perea
Keith David
Daisy Edgar-Jones
Taylor John Smith
Harris Dickinson
Michael Hyatt
Sterling Macer, Jr.
Jojo Regina
Garret Dillahunt
Ahna O'Reilly
David Strathairn
Kate Winslet
Cameron Diaz
Jude Law
Jack Black
Melissa Barrera
Mason Gooding
Jenna Ortega
Jack Quaid
Marley Shelton
Courteney Cox
David Arquette
Neve Campbell
Jake Gyl

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


Lindsay Lohan
Chord Overstreet
George Young
Jack Wagner
Olivia Perez
Alejandra Flores
Sean Dillingham
William 'Bus' Riley
Aliana Lohan
Kate Rachesky
Oscar Rudecindo
Lonzo Liggins
Antonio D. Charity
Omar Wilder
Blythe Howard
Iris Caldwell
Arthur Anderson
Allan Groves
Christina Moore
John Ducey
Violet McGraw
Sachin Bhatt
Lateefah Holder
Matthew Glave
Paxton Booth
Brooke Dillman
Mark W. Gray
Kim House
Tim Mollen
Matthew Nguyen
Shaun Paul Piccinino
Missi Pyle
Mia Goth
David Corenswet
Tandi Wright
Matthew Sunderland
Emma Jenkins-Purro
Alistair Sewell
Amelia Reid
Gabe McDonnell
Lauren Stewart
Todd Rippon
Grace Acheson
Wednesday
https://en.wikipedia.org/wiki/Wednesday_(TV_series)
The White Lotus
https://en.wikipedia.org/wiki/The_White_Lotus
Yellowstone
https://en.wikipedia.org/wiki/Yellowstone_(American_TV_series)
Willow
https://en.wikipedia.org/wiki/Willow_(TV_series)
1899
https://en.wikipedia.org/wiki/1899_(TV_series)
Tulsa King
https://en.wikipedia.org/wiki/Tulsa_King
The Peripheral
https:

JJ Feild
T'Nia Miller
Louis Herthum
Katie Leung
Melinda Page Hamilton
Chris Coy
Alex Hernandez
Julian Moore-Cook
Adelind Horan
Austin Rising
Eli Goree
Charlotte Riley
Alexandra Billings
Tim Allen
Elizabeth Mitchell
Austin Kane
Elizabeth Allen-Dick
Matilda Lawler
Devin Bright
Rupali Redd
Kal Penn
Diego Luna
Stellan Skarsgård
Genevieve O'Reilly
Kyle Soller
Adria Arjona
Fiona Shaw
Denise Gough
Faye Marsay
Varada Sethu
Elizabeth Dulau
Claire Foy
Matt Smith
Vanessa Kirby
Eileen Atkins
Jeremy Northam
Victoria Hamilton
Ben Miles
Greg Wise
Jared Harris
John Lithgow
Alex Jennings
Lia Williams
Anton Lesser
Matthew Goode
Olivia Colman
Tobias Menzies
Helena Bonham Carter
Ben Daniels
Jason Watkins
Marion Bailey
Erin Doherty
Charles Dance
Josh O'Connor
Gillian Anderson
Emma Corrin
Stephen Boxer
Emerald Fennell
Imelda Staunton
Jonathan Pryce
Lesley Manville
Dominic West
Jonny Lee Miller
Olivia Williams
Claudia Harrison
Natascha McElhone
Marcia Warren
Elizabeth Debicki
Dafne Keen
Ruth Wilson
Anne-Mari

Dylan McDermott
Evan Peters
Taissa Farmiga
Denis O'Hare
Jessica Lange
Zachary Quinto
Joseph Fiennes
Sarah Paulson
Lily Rabe
Lizzie Brocheré
James Cromwell
Frances Conroy
Emma Roberts
Kathy Bates
Michael Chiklis
Finn Wittrock
Angela Bassett
Wes Bentley
Matt Bomer
Chloë Sevigny
Cheyenne Jackson
Lady Gaga
Cuba Gooding Jr.
André Holland
Billie Lourd
Alison Pill
Adina Porter
Leslie Grossman
Cody Fern
Matthew Morrison
Gus Kenworthy
John Carroll Lynch
Angelica Ross
Zach Villa
Macaulay Culkin
Ryan Kiera Armstrong
Neal McDonough
Kaia Gerber
Nico Greetham
Isaac Powell
Rachel Hilson
Rebecca Dayan
Russell Tovey
Joe Mantello
Charlie Carver
Sandra Bernhard
Patti LuPone
Gabriel Macht
Patrick J. Adams
Rick Hoffman
Meghan Markle
Sarah Rafferty
Gina Torres
Amanda Schull
Dulé Hill
Katherine Heigl
Melissa Roxburgh
Josh Dallas
Athena Karkanis
J. R. Ramirez
Luna Blaise
Jack Messina
Parveen Kaur
Matt Long
Holly Taylor
Daryl Edwards
Ty Doran
Rose McIver
Utkarsh Ambudkar
Brandon Scott Jones
Danielle Pinnock
Ri

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


Jennifer Coolidge
Jon Gries
F. Murray Abraham
Adam DiMarco
Meghann Fahy
Beatrice Grannò
Tom Hollander
Sabrina Impacciatore
Michael Imperioli
Theo James
Aubrey Plaza
Haley Lu Richardson
Will Sharpe
Simona Tabasco
Leo Woodall
Eleonora Romandini
Murray Bartlett
Connie Britton
Emilia Clarke
Peter Dinklage
Kit Harington
Lena Headey
Sophie Turner
Maisie Williams
Nikolaj Coster-Waldau
Iain Glen
John Bradley
Alfie Allen
Conleth Hill
Liam Cunningham
Gwendoline Christie
Aidan Gillen
Isaac Hempstead Wright
Rory McCann
Nathalie Emmanuel
Jerome Flynn
Ellen Pompeo
Chandra Wilson
James Pickens Jr.
Justin Chambers
Kevin McKidd
Jesse Williams
Bokhee An
Patrick Dempsey
Sara Ramirez
Camilla Luddington
Jessica Capshaw
Sandra Oh
Sarah Drew
Caterina Scorsone
Kelly McCreary
Kim Raver
Jason George
Eric Dane
Morfydd Clark
Ismael Cruz Cordova
Charlie Vickers
Markella Kavenagh
Megan Richards
Sara Zwangobani
Daniel Weyman
Cynthia Addai-Robinson
Lenny Henry
Lloyd Owen
Nazanin Boniadi
Dylan Smith
Alex Tarrant
Tyroe

In [9]:
df = get_imdb_lists(pop_tv_url, api_key) # get list of titles from imdb

In [10]:
df['cast'] = df.apply(lambda row : wiki_cast(row['title'], row['year'], "tv series"), axis=1)

Wednesday
https://en.wikipedia.org/wiki/Wednesday_(TV_series)
The White Lotus
https://en.wikipedia.org/wiki/The_White_Lotus
Yellowstone
https://en.wikipedia.org/wiki/Yellowstone_(American_TV_series)
Willow
https://en.wikipedia.org/wiki/Willow_(TV_series)
1899
https://en.wikipedia.org/wiki/1899_(TV_series)
Tulsa King
https://en.wikipedia.org/wiki/Tulsa_King
The Peripheral
https://en.wikipedia.org/wiki/The_Peripheral_(TV_series)
The Santa Clauses
https://en.wikipedia.org/wiki/The_Santa_Clauses
Andor
https://en.wikipedia.org/wiki/Andor_(TV_series)
The Crown
https://en.wikipedia.org/wiki/The_Crown_(TV_series)
His Dark Materials
https://en.wikipedia.org/wiki/His_Dark_Materials_(TV_series)
National Treasure: Edge of History
https://en.wikipedia.org/wiki/National_Treasure:_Edge_of_History
The Walking Dead
https://en.wikipedia.org/wiki/The_Walking_Dead_(TV_series)
Rick and Morty
https://en.wikipedia.org/wiki/Rick_and_Morty
The Sex Lives of College Girls
https://en.wikipedia.org/wiki/The_Sex_Li

In [11]:
df = df[(df["cast"] != False)]

In [12]:
df = df.explode('cast').reset_index(drop=True) # expand list of cast into rows

In [13]:
df.loc[:,"nepos"] = df.apply(lambda row : wiki_scrape(row['cast']), axis=1)

Jenna Ortega
Gwendoline Christie
Riki Lindhome
Jamie McShane
Hunter Doohan
Percy Hynes White
Emma Myers
Joy Sunday
Georgie Farmer
Naomi J. Ogawa
Christina Ricci
Moosa Mostafa
SKIP
Kevin Costner
Luke Grimes
Kelly Reilly
Wes Bentley
Cole Hauser
Kelsey Asbille
Brecken Merrill
Jefferson White
Danny Huston
Gil Birmingham
Forrie J. Smith
Denim Richards
Ian Bohen
Finn Little
Ryan Bingham
Wendy Moniz
Jennifer Landon
Kathryn Kelly
Moses Brings Plenty
Warwick Davis
Ellie Bamber
Ruby Cruz
Erin Kellyman
Tony Revolori
Amar Chadha-Patel
Dempsey Bryk
Emily Beecham
Aneurin Barnard
Andreas Pietschmann
Miguel Bernardeau
José Pimentão
Isabella Wei
Gabby Wong
Yann Gael
Mathilde Ollivier
Jonas Bloquet
Rosalie Craig
Maciej Musiał
Clara Rosager
Lucas Lynggaard Tønnesen
Maria Erwolter
Alexandre Willaume
Tino Mewes
Isaak Dentler
Fflyn Edwards
Anton Lesser
Sylvester Stallone
Max Casella
Domenick Lombardozzi
Vincent Piazza
Jay Will
A.C. Peterson
Andrea Savage
Martin Starr
Garrett Hedlund
Dana Delany
Annabella Sc

Jeremy Swift
Phil Dunster
Brett Goldstein
Brendan Hunt
Nick Mohammed
Juno Temple
Sarah Niles
Jennifer Aniston
Courteney Cox
Lisa Kudrow
Matt LeBlanc
Matthew Perry
David Schwimmer
SKIP
Elisabeth Moss
Joseph Fiennes
Yvonne Strahovski
Alexis Bledel
Madeline Brewer
Ann Dowd
O-T Fagbenle
Max Minghella
Samira Wiley
Amanda Brugel
Bradley Whitford
Sam Jaeger
Jessica Ann Collins
Luke Evans
Michiel Huisman
Elizabeth Anweis
Carolyn Jones
John Astin
Jackie Coogan
Ted Cassidy
Blossom Rock
Ken Weatherwax
Lisa Loring
Various Doctors
David Tennant
Various companions
Jessica Chastain
Michael Shannon
Steve Zahn
David Wilson Barnes
Walton Goggins
Johnny Galecki
Jim Parsons
Kaley Cuoco
Simon Helberg
Kunal Nayyar
Sara Gilbert
Mayim Bialik
Melissa Rauch
Kevin Sussman
Laura Spencer
Mark Harmon
Sasha Alexander
Michael Weatherly
Pauley Perrette
David McCallum
Sean Murray
Cote de Pablo
Lauren Holly
Rocky Carroll
Brian Dietzen
Emily Wickersham
Wilmer Valderrama
Jennifer Esposito
Duane Henry
Maria Bello
Diona Rea

In [14]:
noskip_df = df[df['cast'] != 'SKIP']
nolist_df = noskip_df[~noskip_df['cast'].str.contains("list", case=False)]
correct_df = nolist_df[~nolist_df['cast'].str.contains("various", case=False)]

In [15]:
fix_cast_imdb = df[(df['cast'] == 'SKIP') | df['cast'].str.contains("list", case=False) | df['cast'].str.contains("various", case=False)]
fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb
fix_cast_imdb = fix_cast_imdb.explode('cast').reset_index(drop=True) # expand list of cast into rows
fix_cast_imdb.loc[:,"nepos"] = fix_cast_imdb.apply(lambda row : wiki_scrape(row['cast']), axis=1)
fix_cast_imdb


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fix_cast_imdb.loc[:,"cast"] = fix_cast_imdb.apply(lambda row : title_specs(row['id']), axis=1) # get top cast from imdb


Jennifer Coolidge
Jon Gries
F. Murray Abraham
Adam DiMarco
Meghann Fahy
Beatrice Grannò
Tom Hollander
Sabrina Impacciatore
Michael Imperioli
Theo James
Aubrey Plaza
Haley Lu Richardson
Will Sharpe
Simona Tabasco
Leo Woodall
Eleonora Romandini
Murray Bartlett
Connie Britton
Emilia Clarke
Peter Dinklage
Kit Harington
Lena Headey
Sophie Turner
Maisie Williams
Nikolaj Coster-Waldau
Iain Glen
John Bradley
Alfie Allen
Conleth Hill
Liam Cunningham
Gwendoline Christie
Aidan Gillen
Isaac Hempstead Wright
Rory McCann
Nathalie Emmanuel
Jerome Flynn
Ellen Pompeo
Chandra Wilson
James Pickens Jr.
Justin Chambers
Kevin McKidd
Jesse Williams
Bokhee An
Patrick Dempsey
Sara Ramirez
Camilla Luddington
Jessica Capshaw
Sandra Oh
Sarah Drew
Caterina Scorsone
Kelly McCreary
Kim Raver
Jason George
Eric Dane
Morfydd Clark
Ismael Cruz Cordova
Charlie Vickers
Markella Kavenagh
Megan Richards
Sara Zwangobani
Daniel Weyman
Cynthia Addai-Robinson
Lenny Henry
Lloyd Owen
Nazanin Boniadi
Dylan Smith
Alex Tarrant
Tyroe

Unnamed: 0,id,rank,rankUpDown,title,fullTitle,year,image,crew,imDbRating,imDbRatingCount,cast,nepos
0,tt13406094,2,0,The White Lotus,The White Lotus (2021),2021,https://m.media-amazon.com/images/M/MV5BYjdjNz...,"Jennifer Coolidge, Jon Gries",7.8,101504,Jennifer Coolidge,False
1,tt13406094,2,0,The White Lotus,The White Lotus (2021),2021,https://m.media-amazon.com/images/M/MV5BYjdjNz...,"Jennifer Coolidge, Jon Gries",7.8,101504,Jon Gries,False
2,tt13406094,2,0,The White Lotus,The White Lotus (2021),2021,https://m.media-amazon.com/images/M/MV5BYjdjNz...,"Jennifer Coolidge, Jon Gries",7.8,101504,F. Murray Abraham,False
3,tt13406094,2,0,The White Lotus,The White Lotus (2021),2021,https://m.media-amazon.com/images/M/MV5BYjdjNz...,"Jennifer Coolidge, Jon Gries",7.8,101504,Adam DiMarco,False
4,tt13406094,2,0,The White Lotus,The White Lotus (2021),2021,https://m.media-amazon.com/images/M/MV5BYjdjNz...,"Jennifer Coolidge, Jon Gries",7.8,101504,Meghann Fahy,False
...,...,...,...,...,...,...,...,...,...,...,...,...
208,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Kevin Michael Richardson,False
209,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Phil Hartman,False
210,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Joe Mantegna,False
211,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Alex Désert,False


In [16]:
df = pd.concat([correct_df, fix_cast_imdb]).reset_index(drop=True)
df.drop_duplicates(subset=['id','cast'], keep='last')

Unnamed: 0,id,rank,rankUpDown,title,fullTitle,year,image,crew,imDbRating,imDbRatingCount,cast,nepos
0,tt13443470,1,0,Wednesday,Wednesday (2022),2022,https://m.media-amazon.com/images/M/MV5BM2ZmMj...,"Jenna Ortega, Gwendoline Christie",8.3,161600,Jenna Ortega,False
1,tt13443470,1,0,Wednesday,Wednesday (2022),2022,https://m.media-amazon.com/images/M/MV5BM2ZmMj...,"Jenna Ortega, Gwendoline Christie",8.3,161600,Gwendoline Christie,False
2,tt13443470,1,0,Wednesday,Wednesday (2022),2022,https://m.media-amazon.com/images/M/MV5BM2ZmMj...,"Jenna Ortega, Gwendoline Christie",8.3,161600,Riki Lindhome,False
3,tt13443470,1,0,Wednesday,Wednesday (2022),2022,https://m.media-amazon.com/images/M/MV5BM2ZmMj...,"Jenna Ortega, Gwendoline Christie",8.3,161600,Jamie McShane,False
4,tt13443470,1,0,Wednesday,Wednesday (2022),2022,https://m.media-amazon.com/images/M/MV5BM2ZmMj...,"Jenna Ortega, Gwendoline Christie",8.3,161600,Hunter Doohan,[https://en.wikipedia.org/wiki/Peter_Doohan]
...,...,...,...,...,...,...,...,...,...,...,...,...
1315,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Kevin Michael Richardson,False
1316,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Phil Hartman,False
1317,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Joe Mantegna,False
1318,tt0096697,100,-2,The Simpsons,The Simpsons (1989),1989,https://m.media-amazon.com/images/M/MV5BYjFkMT...,"Dan Castellaneta, Nancy Cartwright",8.7,408342,Alex Désert,False


In [17]:
df_pct_nepo = pct_nepo(df, "id") # calculate pct nepo by title

Unnamed: 0_level_0,pct_nepo
id,Unnamed: 1_level_1
tt0057729,0.428571
tt0096697,
tt0098904,0.250000
tt0108778,0.333333
tt0118401,0.125000
...,...
tt9012876,
tt9059350,0.125000
tt9253284,0.300000
tt9288030,


In [18]:
df = df.merge(df_pct_nepo, on=['id'], how="left") # merge data frames

In [19]:
df.to_csv("poptv" + ".csv", index=False)

In [None]:
##########################
###  DUPED NEPO ACTORS ###
##########################

In [None]:
#cols_to_select = ['id', 'title', 'year', 'cast', 'nepos', 'pct_nepo']

# create one big fat df with all the diff imdb lists
# select desired cols
# top_250_mov_skinny = top_250_mov[cols_to_select]
# top_250_tv_skinny = top_250_tv[cols_to_select]
# box_office_skinny = box_office[cols_to_select]

# add column for type (tv or film)
top_250_mov['type'] = 'film'
top_250_tv['type'] = 'tv'
pop_mov['type'] = 'film'
pop_tv['type'] = 'tv'
#box_office_skinny['type'] = 'film'

all_df = [pop_mov, pop_tv]
df = pd.concat(all_df)

# drop actors+films groups that appear in >1 imdb list
df.drop_duplicates(subset=['id', 'cast'], keep=False)
df = df.drop(['crew', 'rank'], axis=1)
df['pct_nepo'] = df['pct_nepo'].replace(np.nan, 0) # no nepo babies in the top cast
df["imDbRatingCount"] = pd.to_numeric(df["imDbRatingCount"])

df.to_csv("all_imdb_data.csv", index=False)

### NEPOS ###
# create df with just nepo babies
nepos_df = df.loc[df['nepos'] != False]

# get count of how many titles a nepo baby appears in
nepos_num_titles = nepos_df.groupby(['cast']).size().reset_index(name='num_titles')
mean_nepos_repeats = nepos_num_titles['num_titles'].mean()

nepos_df.to_csv('nepos_only.csv', index=False)

### NOT NEPOS ###
non_nepo_df = df.loc[df['nepos'] == False]
non_nepo_num_titles = non_nepo_df.groupby(['cast']).size().reset_index(name='num_titles')
mean_non_nepo_repeats = non_nepo_num_titles['num_titles'].mean()

### OVERALL COUNTS ###
distinct_nepos = nepos_df.groupby(['cast'])
num_nepo_cast = distinct_nepos.ngroups

distinct_cast = df.groupby(['cast'])
num_cast = distinct_cast.ngroups

distinct_titles = df.groupby(['id'])
num_titles = distinct_titles.ngroups

stat_dict = {
    "total_cast": num_cast,
    "nepos_cast": num_nepo_cast,
    "num_titles": num_titles
    }

In [None]:
##########################
###     USER INPUT     ###
##########################

In [None]:
person_input = input("name: ")
person_cnxn = wiki_scrape(person_input)

if person_cnxn is False :
    print(f"{person_input} is not a nepo baby")
else :
    print(f'{person_input} is a nepo baby. Here are the connection links: {person_cnxn}')

In [None]:
url_search_base = "https://imdb-api.com/en/API/SearchTitle/k_11m5rq35/"
movie_input = input("movie or tv show: ")

data = requests.get(url_search_base + movie_input).json()
data['results']


In [None]:
movie_input = input("movie or tv show: ")
movie_choices = ia.search_movie(movie_input)

movie_choices_list = []

# only showing 5 possible options
for i in range(5) :
    
    index = [1, 2, 3, 4, 5]
    rank = index[i]
    
    movie_title = movie_choices[i]['title']
    movie_id = movie_choices[i].movieID
    
    movie = ia.get_movie(movie_id)
    
    short_cast_list = []
    try :
        cast = movie['cast']
        for i in range(3):
            short_cast_list.append(cast[i]['name'])
    
        formatted_cast_list = f"starring {short_cast_list[0]}, {short_cast_list[1]}, and {short_cast_list[2]}"
    except KeyError:
        formatted_cast_list = ""
    except IndexError:
        formatted_cast_list = ""
        
    try :
        year = movie['year']
        year = f" ({year})"
    except KeyError:
        year = ""
    print(f"{rank}: {movie_title}{year} {formatted_cast_list}")

movie_choice_input = input(f'people are not very creative with titles so there are a few titles - which number is the correct one?: ')

movie_id = movie_choices[int(movie_choice_input)-1].movieID
movie_url = imdb_base_url + "tt" + movie_id

movie_full_cast = title_specs("tt"+movie_id)

df = pd.DataFrame([movie_full_cast], columns=['title', 'cast', 'imdb_rating'])
df = df.explode('cast').reset_index(drop=True)
df.loc[:,"nepos"] = df.apply(lambda row : wiki_scrape(row['cast']), axis=1)
title_pct_nepo = pct_nepo(df, 'title')
pct_nepo_str = title_pct_nepo['pct_nepo'][0]

nepo_rows = df[df['nepos'] != False]

print(f'{movie_input} is {pct_nepo_str}% full of nepotism babies. The following people are the culprits: {nepo_rows}')
