# 가장 강한 히어로는 누구인가?
# Marvel이랑 DC랑 싸우면 누가 이기는가?

In [267]:
import pandas as pd
import numpy as np

heroInfo = pd.read_csv('../data/HeroInfo.csv',)
heroInfo_df = heroInfo[['Name','Speed','Power','Gender','Publisher']]

In [268]:
heroInfo_df

Unnamed: 0,Name,Speed,Power,Gender,Publisher
0,Zoom,100,100,Male,DC Comics
1,Zatanna,25,100,Female,DC Comics
2,Yoda,35,100,Male,George Lucas
3,Ymir,30,100,Male,Marvel Comics
4,Yellowjacket II,35,35,Female,Marvel Comics
...,...,...,...,...,...
743,Abomination,55,65,Male,Marvel Comics
744,Abin Sur,55,100,Male,DC Comics
745,Abe Sapien,35,100,Male,Dark Horse Comics
746,A-Bomb,20,25,Male,Marvel Comics


In [278]:
heroInfo_df.head()

Unnamed: 0,Name,Speed,Power,Gender,Publisher
0,Zoom,100,100,Male,DC Comics
1,Zatanna,25,100,Female,DC Comics
2,Yoda,35,100,Male,George Lucas
3,Ymir,30,100,Male,Marvel Comics
4,Yellowjacket II,35,35,Female,Marvel Comics


In [279]:
heroInfo_df.count()

Name         748
Speed        578
Power        578
Gender       742
Publisher    724
dtype: int64

In [281]:
heroInfo_df.dtypes

Name         object
Speed        object
Power        object
Gender       object
Publisher    object
dtype: object

In [283]:
heroInfo_df.tail()

Unnamed: 0,Name,Speed,Power,Gender,Publisher
743,Abomination,55,65,Male,Marvel Comics
744,Abin Sur,55,100,Male,DC Comics
745,Abe Sapien,35,100,Male,Dark Horse Comics
746,A-Bomb,20,25,Male,Marvel Comics
747,3-D Man,45,25,Male,Marvel Comics


In [282]:
heroInfo_df.isnull().sum()

Name           0
Speed        170
Power        170
Gender         6
Publisher     24
dtype: int64

## 결측치 처리

In [None]:
heroInfo_df['Speed'] = heroInfo_df['Speed'].replace('-','')
heroInfo_df['Speed'] = heroInfo_df['Speed'].replace('',np.nan)

In [None]:
heroInfo_df['Power'] = heroInfo_df['Power'].replace('-','')
heroInfo_df['Power'] = heroInfo_df['Power'].replace('',np.nan)

In [191]:
heroInfo_df['Speed'].isnull().sum()

170

In [192]:
heroInfo_df['Power'].isnull().sum()

170

In [193]:
heroInfo_df = heroInfo_df.dropna()

In [194]:
heroInfo_df.isnull().sum()

Name         0
Speed        0
Power        0
Gender       0
Publisher    0
dtype: int64

# 수치형 데이터로 변환

In [None]:
heroInfo_df['Speed'] = heroInfo_df['Speed'].astype(float)
heroInfo_df['Power'] = heroInfo_df['Power'].astype(float)

In [196]:
heroInfo_df.dtypes

Name          object
Speed        float64
Power        float64
Gender        object
Publisher     object
dtype: object

In [197]:
heroInfo_ex = heroInfo[['Name','Strength']]

In [198]:
heroInfo_ex.isnull().sum()

Name        0
Strength    6
dtype: int64

In [None]:
heroInfo_ex['Strength'] = heroInfo_ex['Strength'].replace('-','')
heroInfo_ex['Strength'] = heroInfo_ex['Strength'].replace('',np.nan)

In [200]:
heroInfo_ex = heroInfo_ex.dropna()

In [201]:
heroInfo_ex.isnull().sum()

Name        0
Strength    0
dtype: int64

In [202]:
heroInfo_ex['Strength'] = heroInfo_ex['Strength'].astype(float)

In [203]:
heroInfo_df.head()

Unnamed: 0,Name,Speed,Power,Gender,Publisher
0,Zoom,100.0,100.0,Male,DC Comics
1,Zatanna,25.0,100.0,Female,DC Comics
2,Yoda,35.0,100.0,Male,George Lucas
3,Ymir,30.0,100.0,Male,Marvel Comics
4,Yellowjacket II,35.0,35.0,Female,Marvel Comics


In [204]:
heroInfo_ex.head()

Unnamed: 0,Name,Strength
0,Zoom,10.0
1,Zatanna,10.0
2,Yoda,55.0
3,Ymir,100.0
4,Yellowjacket II,10.0


## Speed, Power 프레임 + Strength 프레임 Name 기준으로 merge

In [205]:
hero_merge = pd.merge(left = heroInfo_df , right = heroInfo_ex, how = "inner", on = "Name")
hero_merge

Unnamed: 0,Name,Speed,Power,Gender,Publisher,Strength
0,Zoom,100.0,100.0,Male,DC Comics,10.0
1,Zatanna,25.0,100.0,Female,DC Comics,10.0
2,Yoda,35.0,100.0,Male,George Lucas,55.0
3,Ymir,30.0,100.0,Male,Marvel Comics,100.0
4,Yellowjacket II,35.0,35.0,Female,Marvel Comics,10.0
...,...,...,...,...,...,...
604,Abomination,55.0,65.0,Male,Marvel Comics,80.0
605,Abin Sur,55.0,100.0,Male,DC Comics,90.0
606,Abe Sapien,35.0,100.0,Male,Dark Horse Comics,30.0
607,A-Bomb,20.0,25.0,Male,Marvel Comics,100.0


# 강함 요소(Speed,Power,Strength)를 합친 Total 열 추가

In [206]:
hero_merge['Total'] = hero_merge['Speed'] + hero_merge['Power'] + hero_merge['Strength']
hero_merge

Unnamed: 0,Name,Speed,Power,Gender,Publisher,Strength,Total
0,Zoom,100.0,100.0,Male,DC Comics,10.0,210.0
1,Zatanna,25.0,100.0,Female,DC Comics,10.0,135.0
2,Yoda,35.0,100.0,Male,George Lucas,55.0,190.0
3,Ymir,30.0,100.0,Male,Marvel Comics,100.0,230.0
4,Yellowjacket II,35.0,35.0,Female,Marvel Comics,10.0,80.0
...,...,...,...,...,...,...,...
604,Abomination,55.0,65.0,Male,Marvel Comics,80.0,200.0
605,Abin Sur,55.0,100.0,Male,DC Comics,90.0,245.0
606,Abe Sapien,35.0,100.0,Male,Dark Horse Comics,30.0,165.0
607,A-Bomb,20.0,25.0,Male,Marvel Comics,100.0,145.0


# Total을 기반으로 각 히어로들 순위 측정

In [219]:
hero_merge['rank'] = hero_merge['Total'].rank(method='min',ascending=False).astype(int)

hero_merge.sort_values(by='rank')

Unnamed: 0,Name,Speed,Power,Gender,Publisher,Strength,Total,rank
105,Silver Surfer,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
397,General Zod,100.0,100.0,Male,DC Comics,100.0,300.0,1
119,Sentry,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
543,Beyonder,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
572,Ardina,100.0,100.0,Female,Marvel Comics,100.0,300.0,1
...,...,...,...,...,...,...,...,...
39,Two-Face,15.0,10.0,Male,DC Comics,10.0,35.0,603
8,Wyatt Wingfoot,15.0,5.0,Male,Marvel Comics,10.0,30.0,606
599,Agent Bob,15.0,5.0,Male,Marvel Comics,10.0,30.0,606
75,Stormtrooper,10.0,5.0,Male,George Lucas,10.0,25.0,608


In [222]:
hero_merge[(hero_merge['rank'] == 1)&(hero_merge['Publisher']=='DC Comics')]

Unnamed: 0,Name,Speed,Power,Gender,Publisher,Strength,Total,rank
57,The Presence,100.0,100.0,Male,DC Comics,100.0,300.0,1
70,Superman,100.0,100.0,Male,DC Comics,100.0,300.0,1
71,Supergirl,100.0,100.0,Female,DC Comics,100.0,300.0,1
72,Superboy-Prime,100.0,100.0,Male,DC Comics,100.0,300.0,1
93,Spectre,100.0,100.0,Male,DC Comics,100.0,300.0,1
171,Power Girl,100.0,100.0,Female,DC Comics,100.0,300.0,1
397,General Zod,100.0,100.0,Male,DC Comics,100.0,300.0,1


- DC 코믹스에서 가장 강한 영웅들 추출

In [223]:
hero_merge[(hero_merge['rank'] == 1)&(hero_merge['Publisher']=='Marvel Comics')]

Unnamed: 0,Name,Speed,Power,Gender,Publisher,Strength,Total,rank
105,Silver Surfer,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
119,Sentry,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
188,One-Above-All,100.0,100.0,-,Marvel Comics,100.0,300.0,1
264,Living Tribunal,100.0,100.0,-,Marvel Comics,100.0,300.0,1
543,Beyonder,100.0,100.0,Male,Marvel Comics,100.0,300.0,1
572,Ardina,100.0,100.0,Female,Marvel Comics,100.0,300.0,1


- 마블 코믹스에서 가장 강한 영웅들 추출

In [288]:
hero_merge['Publisher'].value_counts().nlargest(3).to_frame()

Unnamed: 0,Publisher
Marvel Comics,310
DC Comics,215
Dark Horse Comics,17


- 가장 많은 영웅을 보유하고 있는건 마블 코믹스

In [236]:
group_cols = ['Publisher']
agg_cols=['Speed','Power','Strength']

In [287]:
fg = hero_merge.groupby(group_cols)[agg_cols].agg(['sum','min','max','count'])
fg

Unnamed: 0_level_0,Speed,Speed,Speed,Speed,Power,Power,Power,Power,Strength,Strength,Strength,Strength
Unnamed: 0_level_1,sum,min,max,count,sum,min,max,count,sum,min,max,count
Publisher,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
ABC Studios,100.0,10.0,55.0,4,125.0,15.0,60.0,4,70.0,10.0,40.0,4
DC Comics,10255.0,10.0,100.0,215,15850.0,10.0,100.0,215,10870.0,5.0,100.0,215
Dark Horse Comics,680.0,15.0,95.0,17,1330.0,20.0,100.0,17,650.0,10.0,85.0,17
George Lucas,390.0,10.0,50.0,13,830.0,5.0,100.0,13,350.0,10.0,55.0,13
IDW Publishing,200.0,50.0,50.0,4,240.0,60.0,60.0,4,70.0,15.0,20.0,4
Icon Comics,95.0,15.0,30.0,4,100.0,20.0,30.0,4,40.0,10.0,10.0,4
Image Comics,205.0,50.0,100.0,3,270.0,70.0,100.0,3,245.0,70.0,100.0,3
J. K. Rowling,25.0,25.0,25.0,1,100.0,100.0,100.0,1,10.0,10.0,10.0,1
J. R. R. Tolkien,35.0,35.0,35.0,1,100.0,100.0,100.0,1,85.0,85.0,85.0,1
Marvel Comics,12975.0,5.0,100.0,310,20410.0,0.0,100.0,310,14405.0,5.0,100.0,310


- 각 출판사 별을 그룹화하여 정보 추출

- DC 코믹스가 rank 1등 영웅이 한명 더 많지만 전체적인 영웅수가 마블이 훨씬 많다.-\
- 마블승

In [289]:
hero_merge.to_csv("hero_rank_service.csv",encoding='UTF-8')