**1. Download Dataset**

In [1]:
# Menginstal paket kaggle dari PyPI menggunakan pip (alat manajemen paket untuk Python)
!pip install -q kaggle

Uploaded kaggle

In [2]:
# Membuat direktori .kaggle di direktori pengguna (jika belum ada)
!mkdir -p ~/.kaggle
# Menyalin file kaggle.json ke direktori .kaggle di direktori pengguna
!cp kaggle.json ~/.kaggle/

In [3]:
# Fungsi untuk memberikan izin akses file, chmod itu change mode
!chmod 600 ~/.kaggle/kaggle.json

Processing Download

In [4]:
# Mengunduh dataset diabetes-dataset dari Kaggle
!kaggle datasets download -d genshinplayer/genshin-impact-characters-stats

Dataset URL: https://www.kaggle.com/datasets/genshinplayer/genshin-impact-characters-stats
License(s): ODbL-1.0
Downloading genshin-impact-characters-stats.zip to /content
  0% 0.00/6.40k [00:00<?, ?B/s]
100% 6.40k/6.40k [00:00<00:00, 13.6MB/s]


In [5]:
# Membuat folder baru di content
!mkdir /content/genshindataset
# Mengimpor modul zipfile untuk bekerja dengan file arsip ZIP
import zipfile
# Mengekstrak semua file dari arsip ZIP ke direktori /content/
with zipfile.ZipFile('genshin-impact-characters-stats.zip', 'r') as zip_ref:zip_ref.extractall('/content/genshindataset')

**2. Import Library**

In [6]:
# Mengimpor library NumPy untuk operasi numerik efisien
import numpy as np
# Mengimpor library Pandas untuk manipulasi dan analisis data tabular
import pandas as pd
# Mengimpor library Seaborn untuk visualisasi data
import seaborn as sns
# Mengimpor library Matplotlib untuk visualisasi data
import matplotlib.pyplot as plt
# Mengimpor library Warnings untuk mengelola pesan peringatan
import warnings
# Menonaktifkan pesan peringatan untuk hasil yang lebih bersih
warnings.filterwarnings('ignore')

# Mengimpor fungsi untuk evaluasi model
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Mengimpor fungsi untuk membagi data menjadi data latih dan data uji
from sklearn.model_selection import train_test_split
# Mengimpor kelas RandomForestClassifier dari scikit-learn
from sklearn.ensemble import RandomForestClassifier
# Mengimpor kelas KNeighborsClassifier dari scikit-learn
from sklearn.neighbors import KNeighborsClassifier
# Mengimpor kelas MLPClassifier dari scikit-learn
from sklearn.neural_network import MLPClassifier

In [7]:
pwd

'/content'

**3. Load & Display Dataset**

In [8]:
# membaca file CSV yang disimpan di lokasi "/content/bodydataset/bdims.csv", menyimpannya dalam sebuah DataFrame Pandas, dan kemudian menampilkan DataFrame tersebut

df = pd.read_csv("/content/genshindataset/Genshin_Impact_All_Character_Stat.csv")
# print("Semua data:\n")
# display(df)

print("1. Dua record pertama:\n")
display(df.head(2))

print("\n\n\n2. Dua record terakhir:\n")
display(df.tail(2))

print("\n\n\n3. Empat record acak:\n")
display(df.sample(4))

1. Dua record pertama:



Unnamed: 0,Character,Lv,Rarity,Element,Weapon,Main role,Ascension,Base HP,Base ATK,Base DEF
0,Amber,1,4,Pyro,Bow,Sub DPS,ATK,793,19,50
1,Amber,20,4,Pyro,Bow,Sub DPS,ATK,2038,48,129





2. Dua record terakhir:



Unnamed: 0,Character,Lv,Rarity,Element,Weapon,Main role,Ascension,Base HP,Base ATK,Base DEF
572,Kokomi,80,5,Hydro,Catalyst,Healer,Hydro DMG,12524,218,611
573,Kokomi,90,5,Hydro,Catalyst,Healer,Hydro DMG,13471,234,657





3. Empat record acak:



Unnamed: 0,Character,Lv,Rarity,Element,Weapon,Main role,Ascension,Base HP,Base ATK,Base DEF
568,Kokomi,60,5,Hydro,Catalyst,Healer,Hydro DMG,9377,163,457
371,Ganyu,60,5,Cryo,Bow,DPS,CRIT DMG,6355,217,409
536,Aloy,40,5,Cryo,Bow,DPS,Cryo DMG,4899,105,304
113,Kaeya,20,4,Cryo,Sword,Support,Energy Recharge,2506,48,171


**3.1 The shape of the dataset**

In [None]:
# Check the shape before dop duplicate
df.shape

**3.2 List types of all columns**

In [None]:
df.dtypes

**3.3 Info of the dataset**

In [None]:
df.info()

**3.4 Summary of the dataset**

In [None]:
df.describe()

**Check the null value**

**4. Data Cleaning**

**4.1 DRP the duplicate**

In [None]:
df.shape

In [None]:
df = df.drop_duplicates()

**4.2 Check the NULL value**

In [None]:
df.isnull().sum()

In [None]:
df.columns

**Check the no of zero value in dataset**

In [None]:
print("- No of zero value in Character: ", df[df["Character"] == 0].shape[0])
print("- No of zero value in Level: ", df[df["Lv"] == 0].shape[0])
print("- No of zero value in Rarity: ", df[df["Rarity"] == 0].shape[0])
print("- No of zero value in Element: ", df[df["Element"] == 0].shape[0])
print("- No of zero value in Weapon: ", df[df["Weapon"] == 0].shape[0])
print("- No of zero value in Main role: ", df[df["Main role"] == 0].shape[0])
print("- No of zero value in Ascension: ", df[df["Ascension"] == 0].shape[0])
print("- No of zero value in Base HP: ", df[df["Base HP"] == 0].shape[0])
print("- No of zero value in Base ATK: ", df[df["Base ATK"] == 0].shape[0])
print("- No of zero value in Base DEF: ", df[df["Base DEF"] == 0].shape[0])

**Replace the no of zero value with mean of columns**

In [None]:
# df["Glucose"] = df["Glucose"].replace(0, df["Glucose"].mean())
# print("- No of zero value in Glucose: ", df[df["Glucose"] == 0].shape[0])
# df["BloodPressure"] = df["BloodPressure"].replace(0, df["BloodPressure"].mean())
# print("- No of zero value in BloodPressure: ", df[df["BloodPressure"] == 0].shape[0])
# df["SkinThickness"] = df["SkinThickness"].replace(0, df["SkinThickness"].mean())
# print("- No of zero value in SkinThickness: ", df[df["SkinThickness"] == 0].shape[0])
# df["Insulin"] = df["Insulin"].replace(0, df["Insulin"].mean())
# print("- No of zero value in Insulin: ", df[df["Insulin"] == 0].shape[0])
# df["BMI"] = df["BMI"].replace(0, df["BMI"].mean())
# print("- No of zero value in BMI: ", df[df["BMI"] == 0].shape[0])

In [None]:
df.describe();

Histogram of aech feature
**bold text**

In [None]:
df.hist(bins = 10, figsize = (10, 10))
plt.show()