I downloaded this data from [data.world](https://data.world/fivethirtyeight/comic-characters/workspace/file?filename=README.md), courtesy of fivethirtyeight.

The data is split into two files, for DC and Marvel. Each file has the following variables:

 Variable |    Definition  
:----------:|:--------------
 page_id  | The unique identifier for that characters page within the wikia 
name	  | The name of the character
urlslug	  | The unique url within the wikia that takes you to the character
ID	      | The identity status of the character (Secret Identity, Public identity, **on marvel only**: No Dual Identity)
ALIGN	  | If the character is Good, Bad or Neutral
EYE	      | Eye color of the character
HAIR	  | Hair color of the character
SEX	      | Sex of the character (e.g. Male, Female, etc.)
GSM	      | If the character is a gender or sexual minority (e.g. Homosexual characters, bisexual characters)
ALIVE	  | If the character is alive or deceased
APPEARANCES	| The number of appareances of the character in comic books (as of Sep. 2, 2014. Number will become increasingly out of date as time goes on.)
FIRST APPEARANCE | The month and year of the character's first appearance in a comic book, if available
YEAR | The year of the character's first appearance in a comic book, if available

In [1]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
marvel = pd.read_csv('./datasets/marvel-wikia-data.csv')
marvel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16376 entries, 0 to 16375
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   page_id           16376 non-null  int64  
 1   name              16376 non-null  object 
 2   urlslug           16376 non-null  object 
 3   ID                12606 non-null  object 
 4   ALIGN             13564 non-null  object 
 5   EYE               6609 non-null   object 
 6   HAIR              12112 non-null  object 
 7   SEX               15522 non-null  object 
 8   GSM               90 non-null     object 
 9   ALIVE             16373 non-null  object 
 10  APPEARANCES       15280 non-null  float64
 11  FIRST APPEARANCE  15561 non-null  object 
 12  Year              15561 non-null  float64
dtypes: float64(2), int64(1), object(10)
memory usage: 1.6+ MB


In [3]:
marvel.describe()

Unnamed: 0,page_id,APPEARANCES,Year
count,16376.0,15280.0,15561.0
mean,300232.082377,17.033377,1984.951803
std,253460.403399,96.372959,19.663571
min,1025.0,1.0,1939.0
25%,28309.5,1.0,1974.0
50%,282578.0,3.0,1990.0
75%,509077.0,8.0,2000.0
max,755278.0,4043.0,2013.0


In [4]:
marvel.columns

Index(['page_id', 'name', 'urlslug', 'ID', 'ALIGN', 'EYE', 'HAIR', 'SEX',
       'GSM', 'ALIVE', 'APPEARANCES', 'FIRST APPEARANCE', 'Year'],
      dtype='object')

In [5]:
marvel[['name', 'urlslug']]

Unnamed: 0,name,urlslug
0,Spider-Man (Peter Parker),\/Spider-Man_(Peter_Parker)
1,Captain America (Steven Rogers),\/Captain_America_(Steven_Rogers)
2,"Wolverine (James \""Logan\"" Howlett)",\/Wolverine_(James_%22Logan%22_Howlett)
3,"Iron Man (Anthony \""Tony\"" Stark)",\/Iron_Man_(Anthony_%22Tony%22_Stark)
4,Thor (Thor Odinson),\/Thor_(Thor_Odinson)
...,...,...
16371,Ru'ach (Earth-616),\/Ru%27ach_(Earth-616)
16372,Thane (Thanos' son) (Earth-616),\/Thane_(Thanos%27_son)_(Earth-616)
16373,Tinkerer (Skrull) (Earth-616),\/Tinkerer_(Skrull)_(Earth-616)
16374,TK421 (Spiderling) (Earth-616),\/TK421_(Spiderling)_(Earth-616)


In [6]:
spec_chars = ["\\","_","/","%","22", "27"]
for char in spec_chars:
    marvel['name'] = marvel['name'].str.replace(char, ' ')

  marvel['name'] = marvel['name'].str.replace(char, ' ')


In [7]:
for char in spec_chars:
    marvel['urlslug'] = marvel['urlslug'].str.replace(char, ' ')

  marvel['urlslug'] = marvel['urlslug'].str.replace(char, ' ')


In [8]:
marvel[['name','urlslug']]

Unnamed: 0,name,urlslug
0,Spider-Man (Peter Parker),Spider-Man (Peter Parker)
1,Captain America (Steven Rogers),Captain America (Steven Rogers)
2,"Wolverine (James ""Logan "" Howlett)",Wolverine (James Logan Howlett)
3,"Iron Man (Anthony ""Tony "" Stark)",Iron Man (Anthony Tony Stark)
4,Thor (Thor Odinson),Thor (Thor Odinson)
...,...,...
16371,Ru'ach (Earth-616),Ru ach (Earth-616)
16372,Thane (Thanos' son) (Earth-616),Thane (Thanos son) (Earth-616)
16373,Tinkerer (Skrull) (Earth-616),Tinkerer (Skrull) (Earth-616)
16374,TK421 (Spiderling) (Earth-616),TK421 (Spiderling) (Earth-616)


In [9]:
marvel.head()

Unnamed: 0,page_id,name,urlslug,ID,ALIGN,EYE,HAIR,SEX,GSM,ALIVE,APPEARANCES,FIRST APPEARANCE,Year
0,1678,Spider-Man (Peter Parker),Spider-Man (Peter Parker),Secret Identity,Good Characters,Hazel Eyes,Brown Hair,Male Characters,,Living Characters,4043.0,Aug-62,1962.0
1,7139,Captain America (Steven Rogers),Captain America (Steven Rogers),Public Identity,Good Characters,Blue Eyes,White Hair,Male Characters,,Living Characters,3360.0,Mar-41,1941.0
2,64786,"Wolverine (James ""Logan "" Howlett)",Wolverine (James Logan Howlett),Public Identity,Neutral Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,3061.0,Oct-74,1974.0
3,1868,"Iron Man (Anthony ""Tony "" Stark)",Iron Man (Anthony Tony Stark),Public Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,2961.0,Mar-63,1963.0
4,2460,Thor (Thor Odinson),Thor (Thor Odinson),No Dual Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,,Living Characters,2258.0,Nov-50,1950.0


In [10]:
marvel['ID'].values

array(['Secret Identity', 'Public Identity', 'Public Identity', ...,
       'Secret Identity', 'Secret Identity', nan], dtype=object)