# Block 1: import libraries

In [13]:
import pandas as pd
import numpy as np

# Block 2: web scraping the data

In [14]:
data = pd.read_html('https://en.wikipedia.org/wiki/MonsterVerse')

# this table has the characters and movies
data = data[3]

# Block 3: clean up the dataframe

In [15]:
# make the index the different characters instead of 0, 1, 2, 3
data = data.set_index(data.columns[0])  # first column is the index

# change the column names to just be the names of the movies
data.columns = ['Godzilla','Kong:Skull Island','Godzilla:King of the Monsters', 'Godzilla vs. Kong']

# change the index name to also just be “character”
data.index.name = 'Character'

# remove the row that has all duplicates
duplicates = (data['Godzilla'] == data['Kong:Skull Island']) & (data['Godzilla'] == data['Godzilla:King of the Monsters']) & (data['Godzilla'] == data['Godzilla vs. Kong'])
keep_values = (duplicates==False)  # All the rows except the one that has duplicates
data = data[keep_values]

# display first few rows
data.head(7)

Unnamed: 0_level_0,Godzilla,Kong:Skull Island,Godzilla:King of the Monsters,Godzilla vs. Kong
Character,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Godzilla,T.J. StormS,Pictured with archive audio,T.J. StormS,CGI
MUTO,Matt CrossSLee RossS,,CGI,Archive footage
King Kong,,Terry NotarySToby KebbellS,Archive footage,Eric PeteyS[57]
King Ghidorah,,Pictured,Jason LilesSAlan MaxsonSRichard DortonS,Archive footage
Rodan,,Pictured,Jason LilesS,Archive footage
Ishiro Serizawa,Ken Watanabe,,Ken Watanabe,
Vivienne Graham,Sally Hawkins,,Sally Hawkins,


# Block 4: save the file as a .csv with the delimiter being tabs

In [16]:
data.to_csv('monsterverse_character_films.csv',sep='\t')