# Tugas Besar ML - Supervised Learning

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
df = pd.read_csv('factbook.csv')
df.head()

Unnamed: 0,Country,Area,Birth rate,Current account balance,Death rate,Electricity consumption,Electricity production,Exports,GDP,GDP per capita,...,Life expectancy at birth,Military expenditures,Natural gas consumption,Oil consumption,Population,Public debt,Railways,Reserves of foreign exchange & gold,Total fertility rate,Unemployment rate
0,Albania,28748,15.08,504000000,5.12,6760000000,5680000000,552400000,17460000000,4900,...,77.24,1.49,30000000.0,7500,3563112,,447.0,1206000000.0,2.04,14.8
1,Algeria,2381740,17.13,11900000000,4.6,23610000000,25760000000,32160000000,212300000000,6600,...,73.0,3.2,22320000000.0,209000,32531853,37.4,3973.0,43550000000.0,1.92,25.4
2,Angola,1246700,44.64,37880000,25.9,1587000000,1707000000,12760000000,23170000000,2100,...,36.61,10.6,530000000.0,31000,11190786,,2761.0,800000000.0,6.27,
3,Argentina,2766890,16.9,5473000000,7.56,81650000000,81390000000,33780000000,483500000000,12400,...,75.91,1.3,31100000000.0,486000,39537943,118.0,34091.0,19470000000.0,2.19,14.8
4,Armenia,29800,11.76,240400000,8.16,5797000000,6492000000,850000000,13650000000,4600,...,71.55,6.5,1400000000.0,5700,2982904,,845.0,555000000.0,1.32,30.0


In [3]:
ATRIBUT = ['Exports','Imports','Industrial production growth rate','Investment','Unemployment rate']
OUTPUT = ['GDP']

In [4]:
data = df[ATRIBUT+OUTPUT]
data = data.dropna()

In [5]:
for i in ATRIBUT:
    data[i] = (data[i]-data[i].min())/(data[i].max()-data[i].min())
data.head()

Unnamed: 0,Exports,Imports,Industrial production growth rate,Investment,Unemployment rate,GDP
0,0.000563,0.001126,0.28836,0.182137,0.197674,17460000000
1,0.035948,0.010054,0.365079,0.318739,0.351744,212300000000
3,0.037762,0.014669,0.52381,0.180385,0.197674,483500000000
4,0.000896,0.0006,0.603175,0.206655,0.418605,13650000000
5,0.097219,0.066201,0.256614,0.302977,0.056686,611700000000


In [6]:
fig = px.box(data[ATRIBUT])
fig.show()

In [7]:

for i in ATRIBUT:
    mean = data[i].mean()
    std = data[i].std()
    # delete outlier
    z = (data[i] - mean)/std
    data = data.drop(z[z > 3].index)
    data = data.drop(z[z < -3].index)

for i in ATRIBUT:
    data[i] = (data[i]-data[i].min())/(data[i].max()-data[i].min())

data.head()

Unnamed: 0,Exports,Imports,Industrial production growth rate,Investment,Unemployment rate,GDP
0,0.001594,0.006029,0.3,0.311377,0.274227,17460000000
1,0.101761,0.05385,0.407407,0.54491,0.492784,212300000000
3,0.106895,0.07857,0.62963,0.308383,0.274227,483500000000
4,0.002537,0.003212,0.740741,0.353293,0.587629,13650000000
5,0.275204,0.354592,0.255556,0.517964,0.074227,611700000000


In [8]:
# Scatter plot
fig = go.Figure(data=go.Scatter(x=data['Exports'], y=data['GDP'], mode='markers'))

# Mengatur layout
fig.update_layout(title='Scatter Plot Exports vs GDP', xaxis_title='Exports', yaxis_title='GDP')

# Menampilkan plot
fig.show()

In [9]:
# Scatter plot
fig = go.Figure(data=go.Scatter(x=data['Imports'], y=data['GDP'], mode='markers'))

# Mengatur layout
fig.update_layout(title='Scatter Plot Imports vs GDP', xaxis_title='Imports', yaxis_title='GDP')

# Menampilkan plot
fig.show()

In [10]:
# Scatter plot
fig = go.Figure(data=go.Scatter(x=data['Industrial production growth rate'], y=data['GDP'], mode='markers'))

# Mengatur layout
fig.update_layout(title='Scatter Plot Industrial production growth rate vs GDP', xaxis_title='Industrial production growth rate', yaxis_title='GDP')

# Menampilkan plot
fig.show()

In [11]:
# Scatter plot
fig = go.Figure(data=go.Scatter(x=data['Investment'], y=data['GDP'], mode='markers'))

# Mengatur layout
fig.update_layout(title='Scatter Plot Investment vs GDP', xaxis_title='Investment', yaxis_title='GDP')

# Menampilkan plot
fig.show()

In [12]:
# Scatter plot
fig = go.Figure(data=go.Scatter(x=data['Unemployment rate'], y=data['GDP'], mode='markers'))

# Mengatur layout
fig.update_layout(title='Scatter Plot Unemployment rate vs GDP', xaxis_title='Unemployment rate', yaxis_title='GDP')

# Menampilkan plot
fig.show()

In [13]:
# Pembagian data latih dan data test
X_train, X_test, y_train, y_test = train_test_split(data[ATRIBUT], data[OUTPUT], test_size=0.2, random_state=42)

In [14]:
mlp = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000
    )

In [15]:
# Melatih model menggunakan data latih
mlp.fit(X_train, y_train)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.



In [16]:
# Evaluasi model
train_score = mlp.score(X_train, y_train)
test_score = mlp.score(X_test, y_test)
y_pred = mlp.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

In [17]:
# Menampilkan hasil evaluasi
print("Training Score:", train_score)
print("Test Score:", test_score)
print("R^2 Score:", r2)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Training Score: -0.2971407117952114
Test Score: -0.6919554170478774
R^2 Score: -0.6919554170478774
Mean Squared Error: 6.083736759544662e+22
Mean Absolute Error: 157735633967.42554


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=235d0884-4c38-4643-a5a3-913675ab1d49' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>