# Topics Covered
- Identifying and handling missing data.
- Data transformation and normalization.
- Data filtering and deduplication.
- Standardization of categorical data.
- Outlier detection and handling.


## Exercise 1: Identifying and Handling Missing Data

In [2]:
# Melakukan import library pandas untuk memanipulasi data dan analisis data
import pandas as pd

# Menyiapkan dataset yang di isi dengan beberapa error kesalahan
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', None],
    'Age': [24, 30, None, 22, 35],
    'Salary': [48000, None, 57000, None, 60000]
}

# Menjadikan dataset sebagai dataframe pandas
df = pd.DataFrame(data)

# Melakukan pembersihan data

# Mengisi kolom kosong pada umur dimana kolom yang kosong di isi dengan rata - rata dari semua umur
df['Age'].fillna(df['Age'].mean(), inplace=True)

# Mengisi kolom kosong juga pada Gaji dimana kolom yang kosong di isi dengan nilai tengah dari gaji
df['Salary'].fillna(df['Salary'].median(), inplace=True)

# Menghapus data pada Name apabila terdapat yang kosong
df.dropna(subset=['Name'], inplace=True)

# Menampilkan data yang bersih
print('After cleaning:\n')
df

After cleaning:



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].median(), inplace=True)


Unnamed: 0,Name,Age,Salary
0,Alice,24.0,48000.0
1,Bob,30.0,57000.0
2,Charlie,27.75,57000.0
3,David,22.0,57000.0


## Exercise 2: Standardizing Categorical Data

In [3]:

# Menyiapkan contoh dataset yang memiliki kesalahan pada nama kategorinya yaitu hurufnya tidak Kapital
data = {
    'Product': ['Laptop', 'Laptop', 'Desktop', 'Tablet', 'Tablet'],
    'Category': ['Electronics', 'electronics', 'Electronics', 'Gadgets', 'gadgets']
}
# Menjadikan dataset tersebut menjadi dataframe pandas
df = pd.DataFrame(data)

# Melakukan standarisasi data kategori dengan mengubahnya huruf awalnya menjadi kapital
df['Category'] = df['Category'].str.capitalize()

# Menampilkan contoh data setelah di standarisasi
print('Standardized Data:\n')
df


Standardized Data:



Unnamed: 0,Product,Category
0,Laptop,Electronics
1,Laptop,Electronics
2,Desktop,Electronics
3,Tablet,Gadgets
4,Tablet,Gadgets


# Practice Tasks
- Load a dataset of your choice and identify missing values.
- Implement data transformations to normalize numerical columns.
- Standardize categorical columns and remove duplicates.


## Baca dataset tersebut

In [4]:
df = pd.read_csv("./archive (2)/tested.csv")
df.info()
df


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Survived     418 non-null    int64  
 2   Pclass       418 non-null    int64  
 3   Name         418 non-null    object 
 4   Sex          418 non-null    object 
 5   Age          332 non-null    float64
 6   SibSp        418 non-null    int64  
 7   Parch        418 non-null    int64  
 8   Ticket       418 non-null    object 
 9   Fare         417 non-null    float64
 10  Cabin        91 non-null     object 
 11  Embarked     418 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 39.3+ KB


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


## Menhapus setiap nama alias yang dimiliki penumpang

In [5]:
# MEnghapus setiap nama alias karena sepertinya itu tidak diperlukan
df['Name'] = df['Name'].str.split(pat=',').str[1]

## Lihat apakah ada duplikasi pada data kategori

In [6]:
# Lihat apakah ada duplikasi pada nama
print(df[df['Name'].duplicated() != False])
# Lihat apakah ada duplikasi pada PassengerId
print(df[df['PassengerId'].duplicated() != False])

# Menghapus data yang duplicate tapi tetap mempertahankan data yang pertama
df.drop_duplicates(subset=['Name'], keep='first', inplace=True)


     PassengerId  Survived  Pclass                  Name     Sex   Age  SibSp  \
51           943         0       2             Mr. Franz    male  27.0      0   
106          998         0       3            Mr. Daniel    male  21.0      0   
121         1013         0       3              Mr. John    male   NaN      1   
124         1016         0       3              Mr. John    male   NaN      0   
145         1037         0       3            Mr. Julius    male  31.0      3   
148         1040         0       1      Mr. John Bertram    male   NaN      0   
160         1052         1       3           Miss. Julia  female   NaN      0   
183         1075         0       3           Mr. Patrick    male   NaN      0   
187         1079         0       3            Mr. Joseph    male  17.0      2   
189         1081         0       2             Mr. James    male  40.0      0   
193         1085         0       2              Mr. John    male  61.0      0   
206         1098         1  

## Normalisasi pada data numerical

In [7]:
# Mengisi data yang kosong pada umur dengan menghitung rata - rata umur
df['Age'].fillna(value=df['Age'].mean(), inplace=True)

# Setelah itu mengubah tipe data umur menjadi int
df['Age'] = df['Age'].astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(value=df['Age'].mean(), inplace=True)


## Menghapus beberapa baris dan beberapa kolom

In [8]:
# Karena pada 
df[df['Fare'].isnull()]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
152,1044,0,3,Mr. Thomas,male,60,0,0,3701,,,S


In [9]:
# Karena hanya 1 baris saja yang kosong pada Fare maka untuk mempermudah hanya menghapus baris tersebut
df.dropna(subset=['Fare'], inplace=True)
# Menhapus kolom cabin karena datanya banyak yang kosong jadi sudah tidak layak pakai
df.drop(columns=['Cabin'], inplace=True)


In [10]:
## Menampilkan Dataset yang sudah dibersihkan
df.info()
df

<class 'pandas.core.frame.DataFrame'>
Index: 380 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  380 non-null    int64  
 1   Survived     380 non-null    int64  
 2   Pclass       380 non-null    int64  
 3   Name         380 non-null    object 
 4   Sex          380 non-null    object 
 5   Age          380 non-null    int64  
 6   SibSp        380 non-null    int64  
 7   Parch        380 non-null    int64  
 8   Ticket       380 non-null    object 
 9   Fare         380 non-null    float64
 10  Embarked     380 non-null    object 
dtypes: float64(1), int64(6), object(4)
memory usage: 35.6+ KB


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,892,0,3,Mr. James,male,34,0,0,330911,7.8292,Q
1,893,1,3,Mrs. James (Ellen Needs),female,47,1,0,363272,7.0000,S
2,894,0,2,Mr. Thomas Francis,male,62,0,0,240276,9.6875,Q
3,895,0,3,Mr. Albert,male,27,0,0,315154,8.6625,S
4,896,1,3,Mrs. Alexander (Helga E Lindqvist),female,22,1,1,3101298,12.2875,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,0,3,Mr. Woolf,male,30,0,0,A.5. 3236,8.0500,S
414,1306,1,1,Dona. Fermina,female,39,0,0,PC 17758,108.9000,C
415,1307,0,3,Mr. Simon Sivertsen,male,38,0,0,SOTON/O.Q. 3101262,7.2500,S
416,1308,0,3,Mr. Frederick,male,30,0,0,359309,8.0500,S


# Homework for Students
- **Pertemuan 12**: Clean a real-world dataset (from Kaggle or another source), perform normalization, handle outliers, and prepare the data for analysis.

## Inisiasi dataset
- Dataframe 1 = https://www.kaggle.com/datasets/sadkoktaybicici/mobile-legends-bang-bang-mlbb-heros-dataset
- Dataframe 2 = https://www.opendatabay.com/data/ai-ml/9c27ee82-a4db-4260-a82f-8a980ed3f280

In [11]:
# Import Dataset
df1 = pd.read_csv('./DatasetMLBB/mlbb_heroes.csv').rename(columns={"hero_name":"Name"})
df2 = pd.read_csv('./DatasetMLBB/Mlbb_Heroes.csv')
print(df1.info())
print(df2.info())

# Melakukan join pada untuk kedua dataset
df = pd.merge(df1, df2, how="inner", on="Name")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name                  103 non-null    object 
 1   role                  103 non-null    object 
 2   defense_overall       103 non-null    float64
 3   offense_overall       103 non-null    float64
 4   skill_effect_overall  103 non-null    float64
 5   difficulty_overall    103 non-null    float64
 6   movement_spd          103 non-null    int64  
 7   magic_defense         103 non-null    int64  
 8   mana                  103 non-null    int64  
 9   hp_regen              103 non-null    int64  
 10  physical_atk          103 non-null    int64  
 11  physical_defense      103 non-null    int64  
 12  hp                    103 non-null    int64  
 13  attack_speed          103 non-null    float64
 14  mana_regen            103 non-null    float64
 15  win_rate              1

In [12]:
# Melihat info dari dataset yang nantinya akan digunakan
print(df.info())
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 36 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Name                  94 non-null     object 
 1   role                  94 non-null     object 
 2   defense_overall       94 non-null     float64
 3   offense_overall       94 non-null     float64
 4   skill_effect_overall  94 non-null     float64
 5   difficulty_overall    94 non-null     float64
 6   movement_spd          94 non-null     int64  
 7   magic_defense         94 non-null     int64  
 8   mana                  94 non-null     int64  
 9   hp_regen              94 non-null     int64  
 10  physical_atk          94 non-null     int64  
 11  physical_defense      94 non-null     int64  
 12  hp                    94 non-null     int64  
 13  attack_speed          94 non-null     float64
 14  mana_regen            94 non-null     float64
 15  win_rate              94 

Unnamed: 0,Name,role,defense_overall,offense_overall,skill_effect_overall,difficulty_overall,movement_spd,magic_defense,mana,hp_regen,...,Hp_Regen,Mana,Mana_Regen,Phy_Damage,Mag_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss
0,Terizla,fighter,7.0,8.0,6.0,6.0,255,10,430,54,...,10.8,430,21.0,129,0,19,15,255,103,105
1,Martis,fighter,6.0,8.0,5.0,5.0,260,10,0,35,...,7.0,0,0.0,128,0,25,15,260,95,110
2,Grock,tank,8.0,5.0,6.0,4.0,260,10,430,42,...,8.4,430,12.0,135,0,21,15,260,601,634
3,Carmilla,support,5.0,5.0,9.0,5.0,255,10,430,39,...,7.8,430,16.0,126,0,25,15,255,32,47
4,Irithel,marksman,5.0,5.0,8.0,7.0,260,10,438,35,...,7.0,0,0.0,118,0,17,15,260,114,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,mage,7.0,6.0,6.0,4.0,240,10,502,36,...,7.2,502,20.0,114,0,21,15,240,1357,1529
90,Benedetta,assassin,5.0,6.0,7.0,7.0,260,10,460,33,...,6.6,460,16.0,113,0,18,15,255,477,471
91,Brody,marksman,6.0,9.0,4.0,7.0,253,10,435,36,...,7.2,435,15.0,105,0,20,15,253,470,476
92,Natalia,assassin,6.0,9.0,4.0,6.0,250,10,486,35,...,7.0,486,16.0,121,0,18,15,260,173,173


## Menghapus beberapa kolom yang menampilkan informasi yang sama

In [13]:
df.drop(columns=['role', 'defense_overall', 'offense_overall','movement_spd',
       'magic_defense', 'mana', 'hp_regen', 'physical_atk', 'physical_defense',
       'hp', 'mana_regen'], inplace=True)
df

Unnamed: 0,Name,skill_effect_overall,difficulty_overall,attack_speed,win_rate,pick_rate,ban_rate,release_year,Title,Voice_Line,...,Hp_Regen,Mana,Mana_Regen,Phy_Damage,Mag_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss
0,Terizla,6.0,6.0,0.80,47.00,0.15,0.02,2019,Executioner,Primary Role,...,10.8,430,21.0,129,0,19,15,255,103,105
1,Martis,5.0,5.0,0.86,45.00,0.15,0.02,2018,Ashura King,Primary Role,...,7.0,0,0.0,128,0,25,15,260,95,110
2,Grock,6.0,4.0,0.81,47.69,0.14,0.02,2017,Fortress Titan,Primary Role,...,8.4,430,12.0,135,0,21,15,260,601,634
3,Carmilla,9.0,5.0,0.91,53.76,0.08,0.02,2020,Shadow of Twilight,True love is like blood,...,7.8,430,16.0,126,0,25,15,255,32,47
4,Irithel,8.0,7.0,0.82,51.52,0.38,0.03,2017,Jungle Heart,Primary Role,...,7.0,0,0.0,118,0,17,15,260,114,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,6.0,4.0,0.80,49.14,1.25,42.29,2019,Astrologer,Secondary Role,...,7.2,502,20.0,114,0,21,15,240,1357,1529
90,Benedetta,7.0,7.0,0.88,52.38,0.72,49.46,2020,Shadow Ranger,"When the body suffers, the spirit blooms.",...,6.6,460,16.0,113,0,18,15,255,477,471
91,Brody,4.0,7.0,0.96,51.57,0.71,60.05,2020,The Lone Star,Pain is the proof the I'm still alive.,...,7.2,435,15.0,105,0,20,15,253,470,476
92,Natalia,4.0,6.0,0.89,47.76,0.41,61.29,2016,Bright Claw,Primary Role,...,7.0,486,16.0,121,0,18,15,260,173,173


## Menghapus beberapa kolom yang tidak relevan untuk analisis dan tidak relevan dari kedua data yang digabungkan

In [14]:
df.drop(columns=['win_rate', 'pick_rate', 'ban_rate', 'release_year', 'Secondary_Role', 'Title',
       'Voice_Line'], inplace=True)
df

Unnamed: 0,Name,skill_effect_overall,difficulty_overall,attack_speed,Release_Date,Primary_Role,Lane,Hp,Hp_Regen,Mana,Mana_Regen,Phy_Damage,Mag_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss
0,Terizla,6.0,6.0,0.80,2019-06-04,Fighter,EXP Lane,2728,10.8,430,21.0,129,0,19,15,255,103,105
1,Martis,5.0,5.0,0.86,2018,Fighter,EXP Lane,2738,7.0,0,0.0,128,0,25,15,260,95,110
2,Grock,6.0,4.0,0.81,2017,Tank,Roamer,2819,8.4,430,12.0,135,0,21,15,260,601,634
3,Carmilla,9.0,5.0,0.91,2020-01-17,Support,Roamer,2528,7.8,430,16.0,126,0,25,15,255,32,47
4,Irithel,8.0,7.0,0.82,2017-08-15,Marksman,Gold Lane,2540,7.0,0,0.0,118,0,17,15,260,114,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,6.0,4.0,0.80,2019-04-02,Mage,EXP Lane,2573,7.2,502,20.0,114,0,21,15,240,1357,1529
90,Benedetta,7.0,7.0,0.88,2020-11-07,Assassin,EXP Lane,2569,6.6,460,16.0,113,0,18,15,255,477,471
91,Brody,4.0,7.0,0.96,2020-10-16,Marksman,Gold Lane,2490,7.2,435,15.0,105,0,20,15,253,470,476
92,Natalia,4.0,6.0,0.89,2016-12,Assassin,Jungler,2639,7.0,486,16.0,121,0,18,15,260,173,173


## Menambahkan Fitur baru

In [15]:
# Melihat setiap hero berapa kali digunakan pada perlombaan Esport
df['Total_Match'] = df['Esport_Wins'] + df['Esport_Loss']

# Melihat winrate setiap hero dari setiap match
df['Winrate'] = (df['Esport_Wins'] / df['Total_Match']) * 100
# Bulatkan persentase tersebut
df['Winrate'] = df['Winrate'].round(decimals=1)
df

Unnamed: 0,Name,skill_effect_overall,difficulty_overall,attack_speed,Release_Date,Primary_Role,Lane,Hp,Hp_Regen,Mana,Mana_Regen,Phy_Damage,Mag_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss,Total_Match,Winrate
0,Terizla,6.0,6.0,0.80,2019-06-04,Fighter,EXP Lane,2728,10.8,430,21.0,129,0,19,15,255,103,105,208,49.5
1,Martis,5.0,5.0,0.86,2018,Fighter,EXP Lane,2738,7.0,0,0.0,128,0,25,15,260,95,110,205,46.3
2,Grock,6.0,4.0,0.81,2017,Tank,Roamer,2819,8.4,430,12.0,135,0,21,15,260,601,634,1235,48.7
3,Carmilla,9.0,5.0,0.91,2020-01-17,Support,Roamer,2528,7.8,430,16.0,126,0,25,15,255,32,47,79,40.5
4,Irithel,8.0,7.0,0.82,2017-08-15,Marksman,Gold Lane,2540,7.0,0,0.0,118,0,17,15,260,114,109,223,51.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,6.0,4.0,0.80,2019-04-02,Mage,EXP Lane,2573,7.2,502,20.0,114,0,21,15,240,1357,1529,2886,47.0
90,Benedetta,7.0,7.0,0.88,2020-11-07,Assassin,EXP Lane,2569,6.6,460,16.0,113,0,18,15,255,477,471,948,50.3
91,Brody,4.0,7.0,0.96,2020-10-16,Marksman,Gold Lane,2490,7.2,435,15.0,105,0,20,15,253,470,476,946,49.7
92,Natalia,4.0,6.0,0.89,2016-12,Assassin,Jungler,2639,7.0,486,16.0,121,0,18,15,260,173,173,346,50.0


## Mengganti Release_Date dengan Season hero tersebut release

In [16]:
import math
# Mengubah tipe data release_date menjadi tanggal terlebih dahulu
df['Release_Date'] = pd.to_datetime(df['Release_Date'], errors='coerce', format='mixed')


try:
    # Menghitung selisih bulan dari season tersebut
    bulan = (df['Release_Date'].dt.year - 2016) * 12 + (df['Release_Date'].dt.month - 7)

    # Menghitung season
    df['Season'] = bulan // 3 + 1

except:
    df['Season'] = pd.NA

# Saat ditampilkan masih nanyak yang NaN seasonnya oleh sebab itu data yang NaN akan diisi dengan nilai tengah season di tahun tersebut
print(df['Season'])

# Tambahkan kolom tahun
df['Year'] = df['Release_Date'].dt.year
# Hitung nilai tengah season setiap tahun
nilaiTengah = df.groupby('Year')['Season'].median()

# Gunakan DataFrame.map untuk mmengganti kolom yang kosong dengan nilai tengah tahun
df['Season'] = df['Season'].fillna(df['Year'].map(nilaiTengah))

# Menampilkan data yang sudah dibersihkan
df

0     12
1      7
2      3
3     15
4      5
      ..
89    12
90    18
91    18
92     2
93    14
Name: Season, Length: 94, dtype: int32


Unnamed: 0,Name,skill_effect_overall,difficulty_overall,attack_speed,Release_Date,Primary_Role,Lane,Hp,Hp_Regen,Mana,...,Mag_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss,Total_Match,Winrate,Season,Year
0,Terizla,6.0,6.0,0.80,2019-06-04,Fighter,EXP Lane,2728,10.8,430,...,0,19,15,255,103,105,208,49.5,12,2019
1,Martis,5.0,5.0,0.86,2018-01-01,Fighter,EXP Lane,2738,7.0,0,...,0,25,15,260,95,110,205,46.3,7,2018
2,Grock,6.0,4.0,0.81,2017-01-01,Tank,Roamer,2819,8.4,430,...,0,21,15,260,601,634,1235,48.7,3,2017
3,Carmilla,9.0,5.0,0.91,2020-01-17,Support,Roamer,2528,7.8,430,...,0,25,15,255,32,47,79,40.5,15,2020
4,Irithel,8.0,7.0,0.82,2017-08-15,Marksman,Gold Lane,2540,7.0,0,...,0,17,15,260,114,109,223,51.1,5,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,6.0,4.0,0.80,2019-04-02,Mage,EXP Lane,2573,7.2,502,...,0,21,15,240,1357,1529,2886,47.0,12,2019
90,Benedetta,7.0,7.0,0.88,2020-11-07,Assassin,EXP Lane,2569,6.6,460,...,0,18,15,255,477,471,948,50.3,18,2020
91,Brody,4.0,7.0,0.96,2020-10-16,Marksman,Gold Lane,2490,7.2,435,...,0,20,15,253,470,476,946,49.7,18,2020
92,Natalia,4.0,6.0,0.89,2016-12-01,Assassin,Jungler,2639,7.0,486,...,0,18,15,260,173,173,346,50.0,2,2016


## Menyempurnakan dan membuang kolom yang tadi digunakan untuk membantu perhitungan

In [17]:
df.drop(columns=['Release_Date', 'Year', 'Mag_Damage'], inplace=True)

In [18]:
## Data yang sudah dibersihkan dan siap di analisis
df

Unnamed: 0,Name,skill_effect_overall,difficulty_overall,attack_speed,Primary_Role,Lane,Hp,Hp_Regen,Mana,Mana_Regen,Phy_Damage,Phy_Defence,Mag_Defence,Mov_Speed,Esport_Wins,Esport_Loss,Total_Match,Winrate,Season
0,Terizla,6.0,6.0,0.80,Fighter,EXP Lane,2728,10.8,430,21.0,129,19,15,255,103,105,208,49.5,12
1,Martis,5.0,5.0,0.86,Fighter,EXP Lane,2738,7.0,0,0.0,128,25,15,260,95,110,205,46.3,7
2,Grock,6.0,4.0,0.81,Tank,Roamer,2819,8.4,430,12.0,135,21,15,260,601,634,1235,48.7,3
3,Carmilla,9.0,5.0,0.91,Support,Roamer,2528,7.8,430,16.0,126,25,15,255,32,47,79,40.5,15
4,Irithel,8.0,7.0,0.82,Marksman,Gold Lane,2540,7.0,0,0.0,118,17,15,260,114,109,223,51.1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Esmeralda,6.0,4.0,0.80,Mage,EXP Lane,2573,7.2,502,20.0,114,21,15,240,1357,1529,2886,47.0,12
90,Benedetta,7.0,7.0,0.88,Assassin,EXP Lane,2569,6.6,460,16.0,113,18,15,255,477,471,948,50.3,18
91,Brody,4.0,7.0,0.96,Marksman,Gold Lane,2490,7.2,435,15.0,105,20,15,253,470,476,946,49.7,18
92,Natalia,4.0,6.0,0.89,Assassin,Jungler,2639,7.0,486,16.0,121,18,15,260,173,173,346,50.0,2
