In [90]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
import warnings


In [91]:
df = pd.read_excel('coba1.xlsx')
df

Unnamed: 0,GDP,Exports,Imports,Industrial production growth rate,Investment,Unemployment rate
0,3745000000000,538800000000,401800000000,6.6,24,4.7
1,2362000000000,893300000000,716700000000,2.2,17.6,10.6
2,310200000000,113000000000,36210000000,2.8,17.2,25
3,1408000000000,162500000000,92910000000,6.4,19.1,8.3
4,251900000000,130700000000,121100000000,4.7,20.4,3.4
...,...,...,...,...,...,...
144,1609000000000,336400000000,329300000000,0.7,19.3,8.6
145,937600000000,172500000000,222000000000,3,25.4,10.4
146,1782000000000,347200000000,439400000000,0.9,16.2,4.8
147,611700000000,86890000000,98100000000,1.9,25.3,5.1


In [92]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 6 columns):
 #   Column                             Non-Null Count  Dtype 
---  ------                             --------------  ----- 
 0   GDP                                149 non-null    int64 
 1   Exports                            149 non-null    int64 
 2   Imports                            149 non-null    int64 
 3   Industrial production growth rate  149 non-null    object
 4   Investment                         149 non-null    object
 5   Unemployment rate                  149 non-null    object
dtypes: int64(3), object(3)
memory usage: 7.1+ KB


In [93]:
print('nilai unik Industrial production growth rate : ')
print(df['Industrial production growth rate'].unique())

print('\nnilai unik Investment : ')
print(df['Investment'].unique())

print('\nnilai unik Unemployment rate : ')
print(df['Unemployment rate'].unique())

nilai unik Industrial production growth rate : 
[6.6 2.2 2.8 6.4 4.7 5.2 17.1 2 10.1 5.5 12.2 0.8 1 12.3 -5 6 10.2 3.5 ' '
 11.1 10.5 8.5 1.7 4 12 1.8 10 7.4 16.5 5 -1.2 7.8 2.5 7.2 13.1 7 6.2 3
 4.4 8 5.7 0 7.7 6.5 4.5 1.6 22 3.8 10.6 3.9 8.2 1.4 18 9.7 3.7 3.4 15.5
 4.6 7.1 17 4.2 8.3 6.9 -7.8 15 3.2 8.4 2.6 6.7 5.4 14 3.1 8.8 30 5.6 -0.3
 -2 0.7 4.1 5.1 2.7 16 3.3 5.9 9.6 1.1 0.9 1.9]

nilai unik Investment : 
[24 17.6 17.2 19.1 20.4 17.5 46 19.4 28.7 15.8 18 19.9 22.7 12.9 8 26.2
 21.7 18.3 9.9 27.4 19.8 16.6 22.5 20.8 22.9 23.8 18.8 17 13.5 23.9 31.3
 16.4 16.3 22.4 18.9 18.6 12.8 ' ' 16.1 25.5 24.7 10.4 25.8 20.6 24.1 19.6
 23.5 11.6 21.8 9.6 29 19.7 24.5 13.6 25.3 17.8 31.5 18.1 34.5 24.9 22
 10.7 23.6 19.2 39.5 47 39.6 33.6 26.3 17.1 19.3 41.4 10.2 11.2 20 26.4
 14.7 21 20.9 16.2 11.3 25 29.1 18.4 20.1 50.8 17.9 18.5 16 32 28 28.2
 26.1 14.9 21.9 28.6 36.6 26 16.7 65.1 14.4 22.6 23.3 22.8 27 22.3 17.3
 25.4 15.7]

nilai unik Unemployment rate : 
[4.7 10.6 25 8.3 3.4 4.3 9.8 7 3

In [94]:
# Menghapus string kosong dengan Nan pada atribut  'Industrial production growth rate', 'Investment', dan 'Unemployment rate'
df['Industrial production growth rate'] = df['Industrial production growth rate'].replace(' ', np.nan)
df['Investment'] = df['Investment'].replace(' ', np.nan)
df['Unemployment rate'] = df['Unemployment rate'].replace(' ', np.nan)

In [95]:
# Menghapus baris yang memiliki Nan value pada atribut  'Industrial production growth rate', 'Investment', dan 'Unemployment rate'
df.dropna(subset=['Industrial production growth rate'], inplace=True)
df.dropna(subset=['Investment'], inplace=True)
df.dropna(subset=['Unemployment rate'], inplace=True)

In [96]:
# Mengubah data types menjadi float
df['Industrial production growth rate'] = df['Industrial production growth rate'].astype(float)
df['Investment'] = df['Investment'].astype(float)
df['Unemployment rate'] = df['Unemployment rate'].astype(float)


In [97]:
# Memilih atribut yang akan digunakan
selected_features = ['Exports', 'Imports', 'Industrial production growth rate', 'Investment', 'Unemployment rate', 'GDP']

# Mengambil subset dataset dengan atribut yang dipilih
df = df[selected_features]

# Normalisasi data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
df[selected_features[:-1]] = scaler_X.fit_transform(df[selected_features[:-1]])
df['GDP'] = scaler_y.fit_transform(df[['GDP']])

# Pisahkan atribut dan target
X = df[selected_features[:-1]]
y = df['GDP']

In [130]:
# Inisialisasi model MLP
model = MLPRegressor(hidden_layer_sizes=(200, 100, 50), activation='tanh', solver='lbfgs', max_iter=2000, random_state=42)

# Inisialisasi evaluasi model
mse_scores = []
mae_scores = []
rmse_scores = []

# K-Fold Cross Validation
k = 5  # Jumlah fold
kf = KFold(n_splits=k, shuffle=True, random_state=42)

fold = 1
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Latih model dengan data latih
    model.fit(X_train, y_train)

    # Prediksi nilai GDP menggunakan data uji
    y_pred = model.predict(X_test)

    # Evaluasi model
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    
    mse_scores.append(mse)
    mae_scores.append(mae)
    rmse_scores.append(rmse)

    fold += 1

In [138]:
# Normalisasi hasil evaluasi
mse_scaled = (np.mean(mse_scores) - np.min(mse_scores)) / (np.max(mse_scores) - np.min(mse_scores))
mae_scaled = (np.mean(mae_scores) - np.min(mae_scores)) / (np.max(mae_scores) - np.min(mae_scores))
rmse_scaled = np.sqrt(mse_scaled)

print(f"Scaled Mean Squared Error (MSE): {mse_scaled:.2f}")
print(f"Scaled Mean Absolute Error (MAE): {mae_scaled:.2f}")
print(f"Scaled Root Mean Squared Error (RMSE): {rmse_scaled:.2f}")

Scaled Mean Squared Error (MSE): 0.42
Scaled Mean Absolute Error (MAE): 0.49
Scaled Root Mean Squared Error (RMSE): 0.65


In [131]:
# Menerima input dari pengguna
user_input = []
for feature in selected_features[:-1]:
    value = float(input(f"Masukkan nilai {feature}: "))
    user_input.append(value)

# Lakukan normalisasi pada input pengguna
normalized_input = scaler_X.transform([user_input])

# Prediksi nilai GDP berdasarkan input pengguna
predicted_GDPs = []
for _ in range(k):  # Lakukan prediksi pada setiap fold
    predicted_GDP = model.predict(normalized_input)
    predicted_GDPs.append(predicted_GDP)

# Mengubah bentuk array menjadi 2D
predicted_GDPs = np.array(predicted_GDPs).reshape(-1, 1)

# Hitung rata-rata prediksi GDP
mean_predicted_GDP = scaler_y.inverse_transform(predicted_GDPs)

print(f"Rata-rata Prediksi GDP: {np.mean(mean_predicted_GDP):.2f}")

Rata-rata Prediksi GDP: 3004734981228.00


