# Scraping Wikipedia page for species named after fictional characters

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [2]:
response = requests.get("https://en.wikipedia.org/wiki/List_of_organisms_named_after_works_of_fiction")
doc = BeautifulSoup(response.text, 'html.parser')

In [3]:
doc.find('h1').text

'List of organisms named after works of fiction'

In [4]:
rows = []
current_h2 = None
current_h3 = None

for tag in doc.find_all(['h2', 'h3', 'tbody']):
    if tag.name == 'h2':
        current_h2 = tag.get_text(strip=True)
        current_h3 = None 
    elif tag.name == 'h3':
        current_h3 = tag.get_text(strip=True)
    elif tag.name == 'tbody':
        for tr in tag.find_all('tr'):
            cells = tr.find_all('td')
            row_data = {}

            if len(cells) > 0:
                try:
                    row_data['taxon'] = cells[0].find('i').get_text(strip=True)
                except:
                    try:
                        row_data['taxon'] = cells[0].get_text(strip=True)
                    except:
                        row_data['taxon'] = None


                try:
                    row_data['named_by'] = cells[0].find('small').get_text(strip=True)
                except:
                    try:
                        row_data['named_by'] = cells[0].find('span').get_text(strip=True)
                    except:
                        row_data['named_by'] = cells[0].get_text(strip=True)
            else:
                row_data['taxon'] = None
                row_data['named_by'] = None


            if len(cells) > 1:
                row_data['type'] = cells[1].get_text(strip=True)
            else:
                row_data['type'] = None


            if len(cells) > 2:
                row_data['namesake'] = cells[2].get_text(strip=True)
            else:
                row_data['namesake'] = None


            if len(cells) > 3:
                row_data['notes'] = cells[3].get_text(strip=True)
            else:
                row_data['notes'] = None


            row_data['media_type'] = current_h2
            row_data['fictional_universe'] = current_h3

            rows.append(row_data)
            print(f"Scraped: {row_data['taxon']}")

print(len(rows))

Scraped: None
Scraped: Antigone antigone
Scraped: Saguinus oedipus
Scraped: Pseudoeurycea rex
Scraped: Oedipodrilus oedipus
Scraped: Amblytylus peitho
Scraped: Atomophora astraia
Scraped: Pseudotanais gaiae
Scraped: Pseudotanais uranos
Scraped: Phyllodrepa daedali
Scraped: Phyllodrepa icari
Scraped: Hotwheels sisyphus
Scraped: None
Scraped: Lokiceratops
Scraped: Medusaceratops lokii
Scraped: Joermungandr bolti
Scraped: Jormungandr walhallaensis
Scraped: Voconia loki
Scraped: None
Scraped: Gigantactis gargantua
Scraped: Gargantuavis
Scraped: Notoetayoa gargantuai
Scraped: Epimeria gargantua
Scraped: None
Scraped: Wukongopterus
Scraped: Ectatosticta wukong
Scraped: Ectatosticta xuanzang
Scraped: Ectatosticta bajie
Scraped: Ectatosticta dapeng
Scraped: Ectatosticta rulai
Scraped: Syntelia sunwukong
Scraped: Salassa sunwukongi
Scraped: Brevistoma raksasiae
Scraped: Ambulyx wukong
Scraped: None
Scraped: Oberonia
Scraped: Sycorax
Scraped: Peneothello
Scraped: Pigrogromitus
Scraped: Queubus
S

In [5]:
df = pd.json_normalize(rows)
df

Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe
0,,,,,,Literature,Greek mythology
1,Antigone antigone,"(Linnaeus, 1758)",Crane,Antigone of Troy,"The species was named after ""Antigone, daughte...",Literature,Greek mythology
2,Saguinus oedipus,"(Linnaeus, 1758)",New World monkey,Oedipus Rex,"""Linnaeus had a penchant for giving primates n...",Literature,Greek mythology
3,Pseudoeurycea rex,"(Dunn, 1921)",Salamander,Oedipus Rex,Species formerly namedOedipus rex,Literature,Greek mythology
4,Oedipodrilus oedipus,"Holt, 1967",Segmented worm,Oedipus Rex,,Literature,Greek mythology
...,...,...,...,...,...,...,...
714,Epicratinus raiden,"Gonçalves & Brescovit, 2024",Spider,"Raiden,Mortal Kombat","""Raiden is of ""truth and light"" and protector ...",Games,Other games
715,,,,,,Other media,
716,Hotwheels sisyphus,"Liu & Zhang, 2024",Spider,Hot Wheels,"""The generic name refers to Hot Wheels, a coll...",Other media,
717,Orsonwelles bellum,"Hormiga, 2002",Spider,The War of the Worlds(1938 radio drama),"""This species, collected below the radio tower...",Other media,


In [6]:
df.isna().sum()

taxon                  75
named_by               75
type                   77
namesake               77
notes                 112
media_type              0
fictional_universe      4
dtype: int64

In [7]:
df.to_csv("species_fiction.csv")

The above list does not contain species named after Harry Potter, LOTR or Star Wars. These are separate Wiki pages with their own tables. 
# Scraping list of species named after characters from Harry Potter

In [8]:
response_HP = requests.get("https://en.wikipedia.org/wiki/List_of_organisms_named_after_the_Harry_Potter_series")
doc_HP = BeautifulSoup(response_HP.text, 'html.parser')

In [9]:


rows_HP = []
h2_HP = None

for tag in doc_HP.find_all(['h2', 'tbody']):
    if tag.name == 'h2':
        h2_HP = tag.get_text(strip=True)
        if h2_HP.lower() in ['references', 'see also', 'external links']:
            break  
    elif tag.name == 'tbody':
        for tr in tag.find_all('tr'):
            cells = tr.find_all('td')
            row_HP = {}
            try:
                row_HP['taxon'] = cells[0].find('a').get_text()
            except:
                row_HP['taxon'] = None
            try:
                row_HP['named_by'] = cells[0].find('span').get_text()
            except:
                row_HP['named_by'] = None
            try:
                row_HP['type'] = cells[1].find('a').get_text()
            except:
                row_HP['type'] = None
            try:
                row_HP['namesake'] = cells[2].find('a').get_text()
            except:
                row_HP['namesake'] = None
            try:
                row_HP['notes'] = cells[3].get_text(strip=True)
            except:
                row_HP['notes'] = None

            # Attach h2
            row_HP['character_type'] = h2_HP

            rows_HP.append(row_HP)
            
len(rows_HP)

24

In [10]:
df_HP = pd.json_normalize(rows_HP)
df_HP.tail(15)

Unnamed: 0,taxon,named_by,type,namesake,notes,character_type
9,Aname aragog,"Harvey et al, 2012",Trapdoor spider,Aragog,"""This species is named for J. K. Rowling’s fic...",Named after magical creatures
10,Ampulex dementor,"Ohl, 2014",Cockroach wasp,Dementor,"""The new species is named after the 'dementors...",Named after magical creatures
11,Cis occamy,"Rosa-Oliveira & Lopes-Andrade, 2023",Minute tree-fungus beetle,Occamy,"""The species name is inspired by the animal Oc...",Named after magical creatures
12,Thestral,"Faúndez & Rider, 2014",Stink bug,Thestral,"""The ivory carinae and calluses on the dorsum ...",Named after magical creatures
13,Leiocanthus nagini,"Sørensen et al, 2016",Kinorhynch,Nagini,"""The species name,nagini, is the female versio...",Named after magical creatures
14,Graphorn,"Faúndez, Rider, & Carvajal, 2017",Stink bug,Graphorn,"""Graphorn [...] from the fictional creature cr...",Named after magical creatures
15,Lycosa aragogi,"Nadolny & Zamani, 2017",Wolf spider,Aragog,"""This species is named after Aragog, the famou...",Named after magical creatures
16,Attacobius demiguise,"Pereira-Filho, Saturnino & Bonaldo, 2018",Corinnid sac spider,Demiguise,"""The Demiguise is described in the Magizoology...",Named after magical creatures
17,Ochyrocera aragogue,"Brescovit, Cizauskas & Mota, 2018",Spider,Aragog,"""The specific name refers to Aragog, a spider ...",Named after magical creatures
18,Macrobiotus naginae,"Vecchi et al., 2022",Tardigrade,Nagini,"""Named after J. K. Rowling’s Harry Potter book...",Named after magical creatures


In [11]:
df_HP['media_type'] = 'Literature'
df_HP['fictional_universe'] = 'Harry Potter'

In [12]:
df_HP

Unnamed: 0,taxon,named_by,type,namesake,notes,character_type,media_type,fictional_universe
0,,,,,,Named after wizards,Literature,Harry Potter
1,Eriovixia gryffindori,"Ahmed, Khalap & Sumukha, 2016",Orb-weaver spider,Godric Gryffindor,"""This uniquely shaped spider derives its name ...",Named after wizards,Literature,Harry Potter
2,Harryplax severus,"Mendoza & Ng, 2017",Pseudozioid crab,Harry Potter,"""The new genus is named primarily in honor of ...",Named after wizards,Literature,Harry Potter
3,Lusius malfoyi,Saunders & Ward 2017,Wasp,Lucius Malfoy,"""This species is named after Lucius Malfoy, a ...",Named after wizards,Literature,Harry Potter
4,Leptanilla voldemort,,Ant,Lord Voldemort,"""The fearsome antagonists in Harry Potter and ...",Named after wizards,Literature,Harry Potter
5,Cardiomya minerva,"Carvalho de Lima, Oliveira & Absalão, 2020",Bivalve,Minerva McGonagall,"""This species is named after the character Min...",Named after wizards,Literature,Harry Potter
6,Alastor moody,"Selis, 2020",Potter wasp,Alastor Moody,"""The name of this species is dedicated to the...",Named after wizards,Literature,Harry Potter
7,Trimeresurus salazar,"Mirza et al., 2020",Pit viper,Salazar Slytherin,"""The specific epithet is a noun in apposition ...",Named after wizards,Literature,Harry Potter
8,,,,,,Named after magical creatures,Literature,Harry Potter
9,Aname aragog,"Harvey et al, 2012",Trapdoor spider,Aragog,"""This species is named for J. K. Rowling’s fic...",Named after magical creatures,Literature,Harry Potter


In [13]:
#Reading in species_fiction dataset 
species = pd.read_csv("species_fiction.csv")

In [14]:
# Adding HP dataset to species dataset
species = pd.concat([species, df_HP], ignore_index=True)

In [15]:
species.shape

(743, 9)

# Scraping list of species named after Tolkien and his works

In [16]:
response = requests.get("https://en.wikipedia.org/wiki/List_of_things_named_after_J._R._R._Tolkien_and_his_works")
doc_tolk = BeautifulSoup(response.text, 'html.parser')

In [17]:
rows_tolk = []
h3_tolk = None
h2_found = False

for tag in doc_tolk.find_all(['h2', 'h3', 'tbody']):
    if tag.name == 'h2':
        h2_text = tag.get_text(strip=True)
        if h2_text == 'Taxonomy':
            h2_found = True
        elif h2_found:
            break  

    elif h2_found and tag.name == 'h3':
        h3_tolk = tag.get_text(strip=True)

    elif h2_found and tag.name == 'tbody':
        for tr in tag.find_all('tr'):
            cells = tr.find_all('td')
            row_tolk = {}

            try:
                row_tolk['taxon'] = cells[0].find('i').get_text(strip=True)
            except:
                try:
                    row_tolk['taxon'] = cells[0].get_text(strip=True)
                except:
                    row_tolk['taxon'] = None

            try:
                row_tolk['named_by'] = cells[0].find('span').get_text(strip=True)
            except:
                row_tolk['named_by'] = None

            try:
                row_tolk['type'] = cells[1].get_text(strip=True)
            except:
                row_tolk['type'] = None

            try:
                row_tolk['namesake'] = cells[2].get_text(strip=True)
            except:
                row_tolk['namesake'] = None

            try:
                row_tolk['notes'] = cells[3].get_text(strip=True)
            except:
                row_tolk['notes'] = None

        
            row_tolk['character_type'] = h3_tolk
            row_tolk['fictional_universe'] = 'Tolkien'
            row_tolk['media_type'] = 'Literature'

            rows_tolk.append(row_tolk)

print(f"Scraped {len(rows_tolk)} Tolkien rows.")


Scraped 230 Tolkien rows.


In [18]:
df_tolk = pd.json_normalize(rows_tolk)
df_tolk.tail(15)

Unnamed: 0,taxon,named_by,type,namesake,notes,character_type,fictional_universe,media_type
215,Hyperlais orodruinella,"Korb, Gorbunov & Melyakh, 2023",Moth,Orodruin(Mount Doom),"""We name the new species after Mount Orodruin....",Objects and locations,Tolkien,Literature
216,,,,,,Elvish words,Tolkien,Literature
217,Aletodon mellon,"Van Valen, 1978",Fossil mammal,mellon,"""Sindarin (Elvish) mellon, friend, the passwor...",Elvish words,Tolkien,Literature
218,Chriacus calenancus,"Van Valen, 1978",Fossil mammal,"calen,anca","""Sindarin (Elvish) calen, green; anca, Jaws. R...",Elvish words,Tolkien,Literature
219,Litomylus alphamon,Van Valen 1978,Fossil mammal,"alph,amon","""Sindarin (Elvish) alph, swan, and amon, hill....",Elvish words,Tolkien,Literature
220,Mimatuta minuial,"Van Valen, 1978",Fossil mammal,minuial,"""Sindarin (Elvish) minuial, the time at dawn w...",Elvish words,Tolkien,Literature
221,Thangorodrim thalion,"Van Valen, 1978",Fossil mammal,thalion,"""Sindarin (Elvish) thalion, strong. Reference ...",Elvish words,Tolkien,Literature
222,Elachista aranella,"Kaila, 1999",Moth,aran-,"""E. aranellaseems to owe its name toaran-, the...",Elvish words,Tolkien,Literature
223,Helicops nentur,"Costaet al, 2016",Snake,"nen,tur","""The namenenturis formed by the (Quenya) words...",Elvish words,Tolkien,Literature
224,Hylaeus mellon,"Dathe and Proshchalykin, 2016",Bee,mellon,"""mellon (High Elvish [Sindarin]): friend""",Elvish words,Tolkien,Literature


In [19]:
# Adding Tolkien dataset to species dataset
species = pd.concat([species, df_tolk], ignore_index=True)

In [20]:
species.shape

(973, 9)

# Scraping list of species named after Star Wars

In [21]:
response = requests.get("https://en.wikipedia.org/wiki/List_of_organisms_named_after_the_Star_Wars_series")
doc_star = BeautifulSoup(response.text, 'html.parser')

In [22]:
doc_star.find('h1').text

'List of organisms named after the Star Wars series'

In [23]:
rows_star = []
h2_star = None

for tag in doc_star.find_all(['h2', 'tbody']):
    if tag.name == 'h2':
        h2_star = tag.get_text(strip=True)
        if h2_star.lower() in ['references', 'see also', 'external links']:
            break

    elif tag.name == 'tbody':
        for tr in tag.find_all('tr'):
            cells = tr.find_all('td')
            row_star = {}

            
            try:
                row_star['taxon'] = cells[0].find('i').get_text(strip=True)
            except:
                try:
                    row_star['taxon'] = cells[0].find('a').get_text(strip=True)
                except:
                    try:
                        row_star['taxon'] = cells[0].get_text(strip=True)
                    except:
                        row_star['taxon'] = None

            
            try:
                row_star['named_by'] = cells[0].find('small').get_text(strip=True)
            except:
                try:
                    row_star['named_by'] = cells[0].find('span').get_text(strip=True)
                except:
                    row_star['named_by'] = None

            
            try:
                row_star['type'] = cells[1].get_text(strip=True)
            except:
                row_star['type'] = None

            
            try:
                row_star['namesake'] = cells[2].get_text(strip=True)
            except:
                row_star['namesake'] = None

            
            try:
                row_star['notes'] = cells[3].get_text(strip=True)
            except:
                row_star['notes'] = None

            
            row_star['character_type'] = h2_star
            row_star['fictional_universe'] = 'Star Wars'
            row_star['media_type'] = 'Films'

            rows_star.append(row_star)

print(f"Scraped {len(rows_star)} Star Wars entries.")


Scraped 74 Star Wars entries.


In [24]:
df_star = pd.json_normalize(rows_star)
df_star.head(15)

Unnamed: 0,taxon,named_by,type,namesake,notes,character_type,fictional_universe,media_type
0,,,,,,Named after Darth Vader,Star Wars,Films
1,Epicratinus anakin,"Gonçalves & Brescovit, 2020",Spider,"""Anakin is a character which change[s] from th...",[4],Named after Darth Vader,Star Wars,Films
2,Epicratinus vader,"Gonçalves & Brescovit, 2020",Spider,"""Darth Vader is a character which wears a cost...",,Named after Darth Vader,Star Wars,Films
3,Polemistus vaderi,"Menke & Vincent, 1983",Wasp,"Menke stated that he ""wanted to add a little h...",[5][6],Named after Darth Vader,Star Wars,Films
4,Darthvaderum greensladeae,"Hunt, 1996",Mite,"""When I saw theSEMof the gnathosoma I immediat...",[7],Named after Darth Vader,Star Wars,Films
5,Adelomyrmex vaderi,"Fernández, 2003",Ant,"""The dark aspect of these ants evokes Darth Va...",[8],Named after Darth Vader,Star Wars,Films
6,Thricops vaderi,"Savage, 2003",True fly,,[9],Named after Darth Vader,Star Wars,Films
7,Agathidium vaderi,"Miller and Wheeler, 2005",Beetle,"Darth Vader ""shares withA. vaderia broad, shin...",[10][11],Named after Darth Vader,Star Wars,Films
8,Garthambrus darthvaderi,"McLay & S.H.Tan, 2009",Crab,"The specific epithet ""alludes to the helmet-li...",[12],Named after Darth Vader,Star Wars,Films
9,Zoosphaerium darthvaderi,"Wesener & Bespalova, 2010",Millipede,"""From theStar Warscharacter Darth Vader, whose...",[13],Named after Darth Vader,Star Wars,Films


In [25]:
# Adding Star Wars dataset to species dataset
species = pd.concat([species, df_star], ignore_index=True)

In [26]:
species.shape

(1047, 9)

In [27]:
species.to_csv("species_scraped.csv")

# Cleaning scraped data

In [28]:
species.head()

Unnamed: 0.1,Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe,character_type
0,0.0,,,,,,Literature,Greek mythology,
1,1.0,Antigone antigone,"(Linnaeus, 1758)",Crane,Antigone of Troy,"The species was named after ""Antigone, daughte...",Literature,Greek mythology,
2,2.0,Saguinus oedipus,"(Linnaeus, 1758)",New World monkey,Oedipus Rex,"""Linnaeus had a penchant for giving primates n...",Literature,Greek mythology,
3,3.0,Pseudoeurycea rex,"(Dunn, 1921)",Salamander,Oedipus Rex,Species formerly namedOedipus rex,Literature,Greek mythology,
4,4.0,Oedipodrilus oedipus,"Holt, 1967",Segmented worm,Oedipus Rex,,Literature,Greek mythology,


In [29]:
#Dropping unnamed column
species = species.drop(columns = ['Unnamed: 0'])

In [30]:
species

Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe,character_type
0,,,,,,Literature,Greek mythology,
1,Antigone antigone,"(Linnaeus, 1758)",Crane,Antigone of Troy,"The species was named after ""Antigone, daughte...",Literature,Greek mythology,
2,Saguinus oedipus,"(Linnaeus, 1758)",New World monkey,Oedipus Rex,"""Linnaeus had a penchant for giving primates n...",Literature,Greek mythology,
3,Pseudoeurycea rex,"(Dunn, 1921)",Salamander,Oedipus Rex,Species formerly namedOedipus rex,Literature,Greek mythology,
4,Oedipodrilus oedipus,"Holt, 1967",Segmented worm,Oedipus Rex,,Literature,Greek mythology,
...,...,...,...,...,...,...,...,...
1042,Calponia harrisonfordi,"Platnick, 1993",Spider,Harrison Ford,Named after Harrison Ford to thank him for nar...,Films,Star Wars,Named afterStar Warsactors
1043,Pheidole harrisonfordi,"E. O. Wilson, 2002",Ant,Harrison Ford,Named after Harrison Ford in honor of his work...,Films,Star Wars,Named afterStar Warsactors
1044,Tachymenoides harrisonfordi,"Lehr, Cusi, Fernandez, Vera & Catenazzi, 2023",Snake,Harrison Ford,"""We dedicate this species to Harrison Ford, ac...",Films,Star Wars,Named afterStar Warsactors
1045,Cantharis mikkelsenorum,Cantharis mikkelsenorum†,Beetle,Lars MikkelsenandMads Mikkelsen,A fossilsoldier beetlefound inBaltic amberfrom...,Films,Star Wars,Named afterStar Warsactors


In [31]:
#Finding empty rows
print(species['taxon'].isna().value_counts())

taxon
False    944
True     103
Name: count, dtype: int64


In [32]:
species[species['taxon'].isna()]

Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe,character_type
0,,,,,,Literature,Greek mythology,
12,,,,,,Literature,Norse mythology,
18,,,,,,Literature,Gargantua and Pantagruel,
23,,,,,,Literature,Journey to the West,
34,,,,,,Literature,William Shakespeare,
...,...,...,...,...,...,...,...,...
1012,,,,,,Films,Star Wars,Named after Han Solo
1015,,,,,,Films,Star Wars,Named after Padmé Amidala
1018,,,,,,Films,Star Wars,Named after Porgs
1021,,,,,,Films,Star Wars,Named after other characters and elements


In [33]:
#Dropping empty rows
species = species.dropna(subset=['taxon']).reset_index(drop=True)

In [34]:
species.shape

(944, 8)

In [35]:
#Extracting 'year' and name from 'named_by' column
import re

In [36]:
species["year"] = species["named_by"].str.extract(r"(\d{4})", expand=False)

In [37]:
species.head()

Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe,character_type,year
0,Antigone antigone,"(Linnaeus, 1758)",Crane,Antigone of Troy,"The species was named after ""Antigone, daughte...",Literature,Greek mythology,,1758
1,Saguinus oedipus,"(Linnaeus, 1758)",New World monkey,Oedipus Rex,"""Linnaeus had a penchant for giving primates n...",Literature,Greek mythology,,1758
2,Pseudoeurycea rex,"(Dunn, 1921)",Salamander,Oedipus Rex,Species formerly namedOedipus rex,Literature,Greek mythology,,1921
3,Oedipodrilus oedipus,"Holt, 1967",Segmented worm,Oedipus Rex,,Literature,Greek mythology,,1967
4,Amblytylus peitho,"Linnavuori, 1997",Leaf bug,Peitho,,Literature,Greek mythology,,1997


In [38]:
species['named_by_clean'] = (species['named_by'].str.replace(r',?\s*\d{4}', '', regex=True).str.strip().str.strip('()'))                             


In [39]:
species.head()

Unnamed: 0,taxon,named_by,type,namesake,notes,media_type,fictional_universe,character_type,year,named_by_clean
0,Antigone antigone,"(Linnaeus, 1758)",Crane,Antigone of Troy,"The species was named after ""Antigone, daughte...",Literature,Greek mythology,,1758,Linnaeus
1,Saguinus oedipus,"(Linnaeus, 1758)",New World monkey,Oedipus Rex,"""Linnaeus had a penchant for giving primates n...",Literature,Greek mythology,,1758,Linnaeus
2,Pseudoeurycea rex,"(Dunn, 1921)",Salamander,Oedipus Rex,Species formerly namedOedipus rex,Literature,Greek mythology,,1921,Dunn
3,Oedipodrilus oedipus,"Holt, 1967",Segmented worm,Oedipus Rex,,Literature,Greek mythology,,1967,Holt
4,Amblytylus peitho,"Linnavuori, 1997",Leaf bug,Peitho,,Literature,Greek mythology,,1997,Linnavuori


In [40]:
#Renaming some columns
species = species.rename(columns = {'named_by' : 'named_by_wiki', 'notes' : 'notes_wiki', 'year' : 'year_wiki', 'named_by_clean' : 'named_by_clean_wiki' })

In [41]:
#Reordering columns
species = species[['taxon', 'type', 'namesake', 'year_wiki', 'named_by_clean_wiki',
     'fictional_universe', 'character_type', 'media_type', 'notes_wiki', 'named_by_wiki']]

In [42]:
species.head()

Unnamed: 0,taxon,type,namesake,year_wiki,named_by_clean_wiki,fictional_universe,character_type,media_type,notes_wiki,named_by_wiki
0,Antigone antigone,Crane,Antigone of Troy,1758,Linnaeus,Greek mythology,,Literature,"The species was named after ""Antigone, daughte...","(Linnaeus, 1758)"
1,Saguinus oedipus,New World monkey,Oedipus Rex,1758,Linnaeus,Greek mythology,,Literature,"""Linnaeus had a penchant for giving primates n...","(Linnaeus, 1758)"
2,Pseudoeurycea rex,Salamander,Oedipus Rex,1921,Dunn,Greek mythology,,Literature,Species formerly namedOedipus rex,"(Dunn, 1921)"
3,Oedipodrilus oedipus,Segmented worm,Oedipus Rex,1967,Holt,Greek mythology,,Literature,,"Holt, 1967"
4,Amblytylus peitho,Leaf bug,Peitho,1997,Linnavuori,Greek mythology,,Literature,,"Linnavuori, 1997"


In [47]:
species.shape

(944, 10)

In [46]:
species.to_csv("species_cleaned_wiki.csv", index = False)