In [46]:
import pandas as pd

In [47]:
#Load the dataset into Pandas and display the first 5 rows.

df = pd.read_csv('marvel_characters.csv')
df.head()

Unnamed: 0,Name,Gender,Alignment,Team,First_Appearance,Movies,Power,Strength
0,Iron Man,Male,Good,Avengers,1963,10,Technology,85
1,Captain America,Male,Good,Avengers,1941,9,Super Soldier,80
2,Thor,Male,Good,Avengers,1962,8,God of Thunder,95
3,Hulk,Male,Good,Avengers,1962,7,Super Strength,100
4,Black Widow,Female,Good,Avengers,1964,7,Martial Arts,70


In [48]:
total_characters = len(df)
total_characters

15

In [49]:
#Show the unique values in the Team column.
df['Team'].unique()

array(['Avengers', 'Asgardians', 'Black Order', 'Independent'],
      dtype=object)

In [50]:
#Count how many male and female characters are there.

df['Gender'].value_counts()

Gender
Male      12
Female     3
Name: count, dtype: int64

In [51]:
#Find the earliest and latest first appearance year.

latest_appearance_char = df[df['First_Appearance'] == df['First_Appearance']].max()
latest_appearance_char

Name                        Vision
Gender                        Male
Alignment                  Neutral
Team                   Independent
First_Appearance              1973
Movies                          10
Power               Vibranium Suit
Strength                       100
dtype: object

In [52]:
earliest_appearance_char = df[ df['First_Appearance'] == df['First_Appearance']].min()
earliest_appearance_char

Name                Black Panther
Gender                     Female
Alignment                     Bad
Team                   Asgardians
First_Appearance             1941
Movies                          3
Power                     Archery
Strength                       65
dtype: object

In [53]:
#Find the average Strength of all characters.
df['Strength'].mean()

np.float64(86.2)

In [54]:
#Group characters by Alignment and find the count of characters in each group.
group_chars = df.groupby('Alignment')['Name'].count()
group_chars

Alignment
Bad         3
Good       11
Neutral     1
Name: Name, dtype: int64

In [55]:
#Find the strongest character (highest Strength).

max_strength = df['Strength'].max()

strongest_char = df[ df['Strength'] == max_strength]
strongest_char

Unnamed: 0,Name,Gender,Alignment,Team,First_Appearance,Movies,Power,Strength
3,Hulk,Male,Good,Avengers,1962,7,Super Strength,100
11,Thanos,Male,Bad,Black Order,1973,5,Infinity Gauntlet,100


In [56]:
#Find the top 3 characters with the most movie appearances.

most_movies = df.sort_values(by = 'Movies', ascending = False).head(3)
most_movies.head(3)

Unnamed: 0,Name,Gender,Alignment,Team,First_Appearance,Movies,Power,Strength
0,Iron Man,Male,Good,Avengers,1963,10,Technology,85
1,Captain America,Male,Good,Avengers,1941,9,Super Soldier,80
2,Thor,Male,Good,Avengers,1962,8,God of Thunder,95


In [57]:
#Replace all "Bad" in Alignment with "Villain".

df['Alignment'] = df['Alignment'].replace("Bad", "Villain")

In [58]:
df

Unnamed: 0,Name,Gender,Alignment,Team,First_Appearance,Movies,Power,Strength
0,Iron Man,Male,Good,Avengers,1963,10,Technology,85
1,Captain America,Male,Good,Avengers,1941,9,Super Soldier,80
2,Thor,Male,Good,Avengers,1962,8,God of Thunder,95
3,Hulk,Male,Good,Avengers,1962,7,Super Strength,100
4,Black Widow,Female,Good,Avengers,1964,7,Martial Arts,70
5,Hawkeye,Male,Good,Avengers,1964,6,Archery,65
6,Spider-Man,Male,Good,Avengers,1962,8,Spider Powers,78
7,Doctor Strange,Male,Good,Avengers,1963,6,Mystic Arts,88
8,Black Panther,Male,Good,Avengers,1966,5,Vibranium Suit,82
9,Scarlet Witch,Female,Neutral,Avengers,1964,6,Chaos Magic,92


In [59]:
#Which team has the highest average Strength?

highest_avg = df.groupby('Team')['Strength'].mean()
highest_avg

Team
Asgardians      85.000000
Avengers        84.416667
Black Order    100.000000
Independent     95.000000
Name: Strength, dtype: float64

In [60]:
#Find the character(s) who appeared in more than 7 movies.

most_char_movies = df[df['Movies'] > 7]

print(most_char_movies[['Name', 'Movies']])


              Name  Movies
0         Iron Man      10
1  Captain America       9
2             Thor       8
6       Spider-Man       8


In [61]:
#Create a new column Decade from First_Appearance (e.g., 1960s, 1970s).

df['Decade'] = (df['First_Appearance'] // 10 * 10).astype(str) + 's'

In [62]:
df

Unnamed: 0,Name,Gender,Alignment,Team,First_Appearance,Movies,Power,Strength,Decade
0,Iron Man,Male,Good,Avengers,1963,10,Technology,85,1960s
1,Captain America,Male,Good,Avengers,1941,9,Super Soldier,80,1940s
2,Thor,Male,Good,Avengers,1962,8,God of Thunder,95,1960s
3,Hulk,Male,Good,Avengers,1962,7,Super Strength,100,1960s
4,Black Widow,Female,Good,Avengers,1964,7,Martial Arts,70,1960s
5,Hawkeye,Male,Good,Avengers,1964,6,Archery,65,1960s
6,Spider-Man,Male,Good,Avengers,1962,8,Spider Powers,78,1960s
7,Doctor Strange,Male,Good,Avengers,1963,6,Mystic Arts,88,1960s
8,Black Panther,Male,Good,Avengers,1966,5,Vibranium Suit,82,1960s
9,Scarlet Witch,Female,Neutral,Avengers,1964,6,Chaos Magic,92,1960s


In [63]:
#Group by Decade and find how many new characters were introduced per decade.

new_char_per_decade = df.groupby('Decade')['Name'].count()
new_char_per_decade

Decade
1940s     2
1960s    12
1970s     1
Name: Name, dtype: int64

In [64]:
def powerLevel(x):
    if x['Strength'] < 75:
        return "Low"
    elif x['Strength'] < 90:
        return "Medium"
    else:
        return "High"

df['Power_level'] = df.apply(powerLevel, axis=1)


In [65]:
print(df[['Name', 'Power_level']])

               Name Power_level
0          Iron Man      Medium
1   Captain America      Medium
2              Thor        High
3              Hulk        High
4       Black Widow         Low
5           Hawkeye         Low
6        Spider-Man      Medium
7    Doctor Strange      Medium
8     Black Panther      Medium
9     Scarlet Witch        High
10             Loki      Medium
11           Thanos        High
12           Ultron        High
13   Captain Marvel        High
14           Vision      Medium
