In [1]:
import csv
import pandas as pd
from datetime import datetime
import calendar
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("../CSV/comic_characters.csv")
df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
0,0,Dave Finn,Secret,Good,Blue,Black,Male,Yes,1,"2002, July",Earth-616,Marvel
1,1,Katherine Power,Secret,Good,Blue,Red,Female,Yes,106,"1984, August",Earth-616,Marvel
2,2,John Malone,Secret,Bad,Blue,White,Male,Yes,15,"1985, January",New Earth,DC
3,3,"Samuel Bradley, Jr.",Public,Bad,Brown,Brown,Male,No,19,"2004, March",New Earth,DC
4,4,Hope Taya,Public,Bad,Brown,Black,Female,Yes,28,"1999, December",New Earth,DC


In [3]:
def clean_and_convert(date_str):
    parts = date_str.split(', ')
    year = int(parts[0])
    
    month = 'Jan'
    if len(parts) > 1:
        month_name = parts[1]

        month_abbreviations = {
            'January': 'Jan', 'February': 'Feb', 'March': 'Mar',
            'April': 'Apr', 'May': 'May', 'June': 'Jun',
            'July': 'Jul', 'August': 'Aug', 'September': 'Sep',
            'October': 'Oct', 'November': 'Nov', 'December': 'Dec'
        }
        month = month_abbreviations.get(month_name, 'Jan')

    return f'{month}/{year}'

df['First_appeared'] = df['First_appeared'].apply(clean_and_convert)
df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
0,0,Dave Finn,Secret,Good,Blue,Black,Male,Yes,1,Jul/2002,Earth-616,Marvel
1,1,Katherine Power,Secret,Good,Blue,Red,Female,Yes,106,Aug/1984,Earth-616,Marvel
2,2,John Malone,Secret,Bad,Blue,White,Male,Yes,15,Jan/1985,New Earth,DC
3,3,"Samuel Bradley, Jr.",Public,Bad,Brown,Brown,Male,No,19,Mar/2004,New Earth,DC
4,4,Hope Taya,Public,Bad,Brown,Black,Female,Yes,28,Dec/1999,New Earth,DC


In [4]:
marvel_df = df[df["Universe"] == "Marvel"]
marvel_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
0,0,Dave Finn,Secret,Good,Blue,Black,Male,Yes,1,Jul/2002,Earth-616,Marvel
1,1,Katherine Power,Secret,Good,Blue,Red,Female,Yes,106,Aug/1984,Earth-616,Marvel
8,8,Perkins,Public,Bad,Green,White,Female,Yes,1,Oct/1964,Earth-616,Marvel
9,9,Zuhn,Secret,Bad,White,Blond,Male,No,3,Nov/2000,Earth-616,Marvel
10,10,Azaziah,Secret,Bad,Blue,White,Male,Yes,4,Apr/1980,Earth-616,Marvel


In [5]:
dc_df = df[df["Universe"] == "DC"]
dc_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
2,2,John Malone,Secret,Bad,Blue,White,Male,Yes,15,Jan/1985,New Earth,DC
3,3,"Samuel Bradley, Jr.",Public,Bad,Brown,Brown,Male,No,19,Mar/2004,New Earth,DC
4,4,Hope Taya,Public,Bad,Brown,Black,Female,Yes,28,Dec/1999,New Earth,DC
5,5,Trygg,Secret,Bad,Black,Black,Male,Yes,3,Jun/1940,New Earth,DC
6,6,Boris Dmitravich Razumihin,Public,Good,Brown,Black,Male,Yes,7,Dec/1987,New Earth,DC


In [6]:
marvel_good_df = marvel_df[marvel_df["Alignment"] == "Good"]
marvel_good_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
0,0,Dave Finn,Secret,Good,Blue,Black,Male,Yes,1,Jul/2002,Earth-616,Marvel
1,1,Katherine Power,Secret,Good,Blue,Red,Female,Yes,106,Aug/1984,Earth-616,Marvel
16,16,Sally Avril,Secret,Good,Brown,Black,Female,No,76,Aug/1962,Earth-616,Marvel
24,24,Plazm,Secret,Good,Red,Blue,Male,No,4,Jul/2001,Earth-616,Marvel
35,35,LaHoya Scripps,Secret,Good,Blue,Blond,Female,Yes,8,Dec/1991,Earth-616,Marvel


In [7]:
marvel_neutral_df = marvel_df[marvel_df["Alignment"] == "Neutral"]
marvel_neutral_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
13,13,Caryn Wise,Non-dual,Neutral,White,Brown,Female,Yes,2,Apr/1983,Earth-616,Marvel
14,14,Otto Octavius,Secret,Neutral,Hazel,Brown,Male,No,526,Jul/1963,Earth-616,Marvel
31,31,Phantom of the Bell Tower,Secret,Neutral,Red,Bald,Male,Yes,1,Jan/1942,Earth-616,Marvel
32,32,Tamara Hashioka,Non-dual,Neutral,Black,Black,Female,Yes,22,Aug/1977,Earth-616,Marvel
46,46,Ouranos,Public,Neutral,Variable,Variable,Male,Yes,3,Jun/2008,Earth-616,Marvel


In [8]:
marvel_bad_df = marvel_df[marvel_df["Alignment"] == "Bad"]
marvel_bad_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
8,8,Perkins,Public,Bad,Green,White,Female,Yes,1,Oct/1964,Earth-616,Marvel
9,9,Zuhn,Secret,Bad,White,Blond,Male,No,3,Nov/2000,Earth-616,Marvel
10,10,Azaziah,Secret,Bad,Blue,White,Male,Yes,4,Apr/1980,Earth-616,Marvel
12,12,Wild One,Public,Bad,Variable,Black,Male,Yes,3,Jul/1992,Earth-616,Marvel
15,15,Black Cougar,Non-dual,Bad,Black,Black,Male,No,1,Sep/1951,Earth-616,Marvel


In [9]:
appearances_bins = [0,10,100,250,500,750,1000,1500,2000,3000,50000]
group_names = ["<10","<100","<250","<500","<750","<1000","<1500","<2000","<3000",">3000"]

marvel_df["Appearances Bins"] = pd.cut(marvel_df["Appearances"],
                                     appearances_bins, labels=group_names,
                                      include_lowest=True)
marvel_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe,Appearances Bins
0,0,Dave Finn,Secret,Good,Blue,Black,Male,Yes,1,Jul/2002,Earth-616,Marvel,<10
1,1,Katherine Power,Secret,Good,Blue,Red,Female,Yes,106,Aug/1984,Earth-616,Marvel,<250
8,8,Perkins,Public,Bad,Green,White,Female,Yes,1,Oct/1964,Earth-616,Marvel,<10
9,9,Zuhn,Secret,Bad,White,Blond,Male,No,3,Nov/2000,Earth-616,Marvel,<10
10,10,Azaziah,Secret,Bad,Blue,White,Male,Yes,4,Apr/1980,Earth-616,Marvel,<10


In [10]:
dc_good_df = dc_df[dc_df["Alignment"] == "Good"]
dc_good_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
6,6,Boris Dmitravich Razumihin,Public,Good,Brown,Black,Male,Yes,7,Dec/1987,New Earth,DC
7,7,Perry White,Public,Good,Blue,Brown,Male,Yes,350,Oct/1986,New Earth,DC
11,11,Vladimir Morakov,Public,Good,Black,Brown,Male,Yes,8,Dec/1989,New Earth,DC
17,17,Nina Mazursky,Secret,Good,White,No,Female,Yes,3,Aug/2011,New Earth,DC
18,18,Tubby Watts,Secret,Good,Brown,Red,Male,Yes,137,Sep/1941,New Earth,DC


In [11]:
dc_neutral_df = dc_df[dc_df["Alignment"] == "Neutral"]
dc_neutral_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
23,23,Fashion Thing,Public,Neutral,Blue,White,Female,Yes,14,Sep/1968,New Earth,DC
36,36,Fidel Castro,Public,Neutral,Black,White,Male,Yes,4,Jan/1988,New Earth,DC
44,44,Teekl,Public,Neutral,Red,Orange,Male,Yes,43,Mar/1973,New Earth,DC
54,54,Chantinelle,Public,Neutral,Brown,Brown,Female,Yes,9,Jul/1991,New Earth,DC
70,70,Jeb Turnbull,Public,Neutral,Brown,Black,Male,Yes,3,Aug/1974,New Earth,DC


In [12]:
dc_bad_df = dc_df[dc_df["Alignment"] == "Bad"]
dc_bad_df.head()

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
2,2,John Malone,Secret,Bad,Blue,White,Male,Yes,15,Jan/1985,New Earth,DC
3,3,"Samuel Bradley, Jr.",Public,Bad,Brown,Brown,Male,No,19,Mar/2004,New Earth,DC
4,4,Hope Taya,Public,Bad,Brown,Black,Female,Yes,28,Dec/1999,New Earth,DC
5,5,Trygg,Secret,Bad,Black,Black,Male,Yes,3,Jun/1940,New Earth,DC
26,26,Simyan,Public,Bad,Brown,Brown,Male,Yes,20,Jan/1971,New Earth,DC


In [13]:
dc_df["Appearances Bins"] = pd.cut(dc_df["Appearances"],
                                  appearances_bins, labels=group_names,
                                  include_lowest=True)
dc_df.head(50)

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe,Appearances Bins
2,2,John Malone,Secret,Bad,Blue,White,Male,Yes,15,Jan/1985,New Earth,DC,<100
3,3,"Samuel Bradley, Jr.",Public,Bad,Brown,Brown,Male,No,19,Mar/2004,New Earth,DC,<100
4,4,Hope Taya,Public,Bad,Brown,Black,Female,Yes,28,Dec/1999,New Earth,DC,<100
5,5,Trygg,Secret,Bad,Black,Black,Male,Yes,3,Jun/1940,New Earth,DC,<10
6,6,Boris Dmitravich Razumihin,Public,Good,Brown,Black,Male,Yes,7,Dec/1987,New Earth,DC,<10
7,7,Perry White,Public,Good,Blue,Brown,Male,Yes,350,Oct/1986,New Earth,DC,<500
11,11,Vladimir Morakov,Public,Good,Black,Brown,Male,Yes,8,Dec/1989,New Earth,DC,<10
17,17,Nina Mazursky,Secret,Good,White,No,Female,Yes,3,Aug/2011,New Earth,DC,<10
18,18,Tubby Watts,Secret,Good,Brown,Red,Male,Yes,137,Sep/1941,New Earth,DC,<250
19,19,Growler,Non-dual,Good,Brown,Black,Male,Yes,11,Apr/1998,New Earth,DC,<100


In [14]:
dc_bins_sorted_df = dc_df.groupby("Appearances Bins")
dc_bins_sorted_df.max()

Unnamed: 0_level_0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
Appearances Bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
<10,21143,Zzlrrrzzzm,Unknown,Neutral,Yellow,Yellow,Unknown,Yes,10,Sep/2011,Unknown,DC
<100,21142,Zviad Baazovi,Unknown,Neutral,Yellow,Yellow,Unknown,Yes,100,Sep/2010,Unknown,DC
<250,20900,Zinda Blake,Secret,Neutral,Yellow,White,Unknown,Yes,250,Sep/1996,Unknown,DC
<500,21129,Zatanna Zatara,Secret,Neutral,White,White,Male,Yes,492,Sep/1983,New Earth,DC
<750,20803,Victor Stone,Secret,Neutral,Green,Red,Male,Yes,716,Oct/1986,New Earth,DC
<1000,17636,Lois Lane,Secret,Good,Blue,Red,Male,Yes,969,Jun/1938,New Earth,DC
<1500,20999,Wonder Woman,Secret,Good,Brown,White,Male,Yes,1316,Oct/1956,Unknown,DC
<2000,6340,Green Lantern,Secret,Good,Brown,Brown,Male,Yes,1565,Oct/1959,Unknown,DC
<3000,14876,Superman,Secret,Good,Blue,Black,Male,Yes,2496,Oct/1986,Unknown,DC
>3000,8155,Batman,Secret,Good,Blue,Black,Male,Yes,3093,May/1939,Unknown,DC


In [15]:
marvel_bins_sorted_df = marvel_df.groupby("Appearances Bins")
marvel_bins_sorted_df.min()

Unnamed: 0_level_0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe
Appearances Bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
<10,0,107,Known to Authorities,Bad,Amber,Auburn,Female,No,1,Apr/1940,Earth-11052,Marvel
<100,16,A Friend,Known to Authorities,Bad,Amber,Auburn,Female,No,11,Apr/1940,Earth-5012,Marvel
<250,1,Adolf Hitler,Known to Authorities,Bad,Black,Auburn,Female,No,101,Apr/1949,Earth-616,Marvel
<500,80,Abner Jenkins,Known to Authorities,Bad,Amber,Auburn,Female,No,252,Apr/1963,Earth-616,Marvel
<750,14,Alexander Summers,Non-dual,Bad,Black,Auburn,Female,No,502,Apr/1964,Earth-616,Marvel
<1000,1629,Carol Danvers,Non-dual,Good,Blue,Black,Female,Yes,752,Aug/1962,Earth-616,Marvel
<1500,4453,Charles Xavier,Non-dual,Good,Blue,Auburn,Female,No,1007,Apr/1964,Earth-616,Marvel
<2000,1022,Henry McCoy,Public,Good,Blue,Blond,Female,Yes,1512,May/1975,Earth-616,Marvel
<3000,2253,Benjamin Grimm,Non-dual,Good,Blue,Black,Male,Yes,2017,Mar/1963,Earth-616,Marvel
>3000,1114,Captain America,Public,Good,Blue,Black,Male,Yes,3061,Aug/1962,Unknown,Marvel


In [26]:
marvel_sortby_eyes_df = marvel_df.sort_values(by=['Eyes','Appearances Bins'])
marvel_sortby_eyes_df

Unnamed: 0,Id,Name,Identity,Alignment,Eyes,Hair,Sex,Alive,Appearances,First_appeared,Planet,Universe,Appearances Bins,Eye Color Bins
492,492,Mercedes Wilson,Secret,Good,Amber,Red,Female,Yes,9,Mar/1999,Earth-616,Marvel,<10,17.0
4298,4298,Araq Mezdbadah,Secret,Good,Amber,Blond,Male,Yes,2,Nov/1992,Earth-616,Marvel,<10,17.0
4831,4831,Zebadiah Creed,Secret,Bad,Amber,Brown,Male,No,4,Aug/1993,Earth-616,Marvel,<10,17.0
5020,5020,Scythe,Non-dual,Bad,Amber,Brown,Male,Yes,2,Jul/1974,Earth-616,Marvel,<10,17.0
7856,7856,Jin Lee,Secret,Good,Amber,Black,Female,Yes,2,Oct/2012,Earth-616,Marvel,<10,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8631,8631,Dormammu,Non-dual,Bad,Yellow,No,Male,Yes,132,Nov/1964,Earth-616,Marvel,<250,7.0
10112,10112,Gamora,Non-dual,Neutral,Yellow,Black,Female,Yes,218,Jun/1975,Earth-616,Marvel,<250,7.0
19431,19431,Supreme Intelligence,Non-dual,Neutral,Yellow,Green,Male,Yes,128,Aug/1967,Earth-616,Marvel,<250,7.0
6607,6607,Raven Darkholme,Secret,Bad,Yellow,Red,Female,Yes,371,Apr/1978,Earth-616,Marvel,<500,7.0


In [19]:
marvel_df['Eyes'].unique()

array(['Blue', 'Green', 'White', 'Variable', 'Hazel', 'Black', 'Brown',
       'Yellow', 'Red', 'No', 'Grey', 'Gold', 'Purple', 'Orange',
       'Auburn', 'Silver', 'Magenta', 'Amber', 'One', 'Photocellular',
       'Multiple', 'Violet', 'Pink', 'Compound'], dtype=object)