<a href="https://colab.research.google.com/github/diaznugraha03/UAS_MachineLearning/blob/main/Game.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Langkah 1: Baca data dari CSV
file_path = 'drive/MyDrive/dataset/vgsales.csv'
df = pd.read_csv(file_path, delimiter=';')
df

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [7]:
# Langkah 2: Pra-pemrosesan data
# Menghapus baris dengan nilai NaN pada kolom target 'Global_Sales'
df = df.dropna(subset=['Global_Sales'])

# Mengisi nilai NaN pada kolom numerik menggunakan median
df['Year'] = df['Year'].fillna(df['Year'].median())


In [8]:
# Encode fitur kategori 'Genre'
label_encoder_genre = LabelEncoder()
df['Genre'] = label_encoder_genre.fit_transform(df['Genre'])

# Pilih fitur dan target
X = df[['Global_Sales', 'Year', 'Platform']]
y = df['Genre']

In [9]:
# One-hot encoding untuk fitur kategori 'Platform'
column_transformer = ColumnTransformer(
    transformers=[
        ('platform', OneHotEncoder(), ['Platform']),
        ('year_imputer', SimpleImputer(strategy='median'), ['Year']),
    ], remainder='passthrough'
)

In [12]:
# Langkah 3: Buat dan latih model regresi linear menggunakan pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('regressor', LinearRegression())
])

# Bagi data menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Latih model
pipeline.fit(X_train, y_train)

# Prediksi menggunakan data pengujian
y_pred = pipeline.predict(X_test)

In [14]:
# Langkah 4: Evaluasi model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 13.594059118639901
R^2 Score: 0.041674312380018064


In [15]:
# Hasil prediksi
predictions = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(predictions.head())

       Actual  Predicted
14770       0   3.807932
13815       2   5.410814
4044        3   3.555220
11271       1   4.280586
16226      10   5.978648
