# Celebrity Guest Stars (from Wikipedia)

Identify which episodes had Special Guest Stars. 

Unfortunately, there does not appear to be a simple list for this, so we have to use the full list of guest stars and reduce it.

In [83]:
# First 20 seasons
# https://en.wikipedia.org/wiki/List_of_The_Simpsons_guest_stars_(seasons_1%E2%80%9320)

# Seasons 21 - current
# https://en.wikipedia.org/wiki/List_of_The_Simpsons_guest_stars

from requests import get
from bs4 import BeautifulSoup
import pandas as pd

In [84]:
# Request from the server the content of the web page by using get(), and store the server’s response in the variable response
response = get('https://en.wikipedia.org/wiki/List_of_The_Simpsons_guest_stars_(seasons_1%E2%80%9320)')

# Parse the content of the request with BeautifulSoup
page_html = BeautifulSoup(response.text, 'html.parser')

In [85]:
# Extract the only sortable table (contains the list of guest-stars)
guest_table = page_html.find('table', class_='sortable')

In [86]:
rows = guest_table.find_all('tr')

In [87]:
guest_list = []
for row in rows:
    name = row.find('th')
    name = name.text.strip()
    cols=row.find_all('td')
    cols=[x.text.strip() for x in cols]
    cols.append(name)
    guest_list.append(cols)
    
del guest_list[0] # Delete header

In [88]:
guests = pd.DataFrame(guest_list, columns=['season', 'role', 'no', 'prodCode',
                                                             'epTitle', 'GuestStar'])

In [89]:
guests.head()

Unnamed: 0,season,role,no,prodCode,epTitle,GuestStar
0,1,Edna Krabappel Ms. Melon,002–102,7G02,"""Bart the Genius""",Marcia Wallace
1,1,Worker,003–103,7G03,"""Homer's Odyssey""[35]",Sam McMurray
2,1,Edna Krabappel,003–103,7G03,"""Homer's Odyssey""",Marcia Wallace
3,1,Ms. Barr,006–106,7G06,"""Moaning Lisa""",Miriam Flynn
4,1,Bleeding Gums Murphy,006–106,7G06,"""Moaning Lisa""[36]",Ron Taylor


In [99]:
guests['GuestStar'].value_counts()

Dana Gould                   3
Terry W. Greene              3
Joan Kenley                  3
Pamela Reed                  3
Eric Idle                    3
Sab Shimono                  3
John Kassir                  3
Stacy Keach                  3
George Takei                 3
Lona Williams                3
Glenn Close                  3
Gene Merlino                 3
Julia Louis-Dreyfus          3
James Earl Jones             3
Stephen Hawking              3
Gary Coleman                 3
Alex Rocco                   3
Adam West                    2
William Daniels              2
Tito Puente                  2
Betty White                  2
Sam McMurray                 2
Ron Taylor                   2
Elizabeth Taylor             2
"Weird Al" Yankovic          2
Neil Armstrong (archival)    2
Gedde Watanabe               2
NRBQ                         2
Dick Tufeld                  2
Thomas Pynchon               2
                            ..
Fyvush Finkel                1
Ted Dans

In [96]:
remove_list = ['Marcia Wallace', 'Phil Hartman', 'Frank Welker', 'Joe Mantegna', 'Maurice LaMarche',
              'Kelsey Grammer', 'Jon Lovitz', 'Jane Kaczmarek', 'Jan Hooks', 'Michael Dees', 'Albert Brooks',
              'Sally Stevens', 'Kipp Lennon', 'Michael Carrington', 'Charles Napier']

guests = guests[~guests['GuestStar'].isin(remove_list)]

In [100]:
guests.reset_index(drop=True)

Unnamed: 0,season,role,no,prodCode,epTitle,GuestStar
0,1,Worker,003–103,7G03,"""Homer's Odyssey""[35]",Sam McMurray
1,1,Ms. Barr,006–106,7G06,"""Moaning Lisa""",Miriam Flynn
2,1,Bleeding Gums Murphy,006–106,7G06,"""Moaning Lisa""[36]",Ron Taylor
3,1,Cowboy Bob,007–107,7G09,"""The Call of the Simpsons""[37]",Albert Brooks[B]
4,1,Gulliver Dark,010–110,7G10,"""Homer's Night Out""[39]",Sam McMurray
5,1,Gendarme Officer,011–111,7G13,"""The Crepes of Wrath""",Christian Coffinet
6,1,Babysitter service receptionistDoofy the Elf,013–113,7G01,"""Some Enchanted Evening""[41]",June Foray
7,1,Ms. Botz / Lucille Botzcowski,013–113,7G01,"""Some Enchanted Evening""[42]",Penny Marshall
8,1,Florist,013–113,7G01,"""Some Enchanted Evening""[41]",Paul Willson
9,2,Karl,015–202,7F02,"""Simpson and Delilah""[43]",Harvey Fierstein


In [98]:
guests['role'].value_counts()

Himself                                                                   205
Herself                                                                    35
Themselves                                                                 18
Narrator                                                                    5
Roger Meyers Jr.                                                            3
Maggie Simpson                                                              3
Mona Simpson                                                                3
Bleeding Gums Murphy                                                        3
Declan Desmond                                                              3
Play the end credits                                                        3
Telephone voice                                                             3
Gloria                                                                      3
Sings "Canyonero!"                                              

In [None]:
celeb_markers = ['Himself', 'Herself', 'Themselves', 'Narrator'] # People credited as themselves are most likely celebrity guests