### Pra-Proses Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
df = pd.read_excel("rumah_tangga.xlsx")
df

Unnamed: 0,No,Daerah,Kelompok Umur,Tahun,Belum Kawin,Kawin,Cerai Hidup,Cerai Mati,Jenis Kelamin
0,1,Perkotaan,10-24,2009,91.63,4.11,3.30,0.97,Perempuan
1,2,Perkotaan,25-44,2009,21.24,20.57,30.22,27.97,Perempuan
2,3,Perkotaan,45-59,2009,3.23,6.52,17.38,72.88,Perempuan
3,4,Perkotaan,60+,2009,1.35,2.08,5.04,91.52,Perempuan
4,5,Pedesaan,10-24,2009,42.37,37.71,15.46,4.46,Perempuan
...,...,...,...,...,...,...,...,...,...
363,364,Pedesaan,60+,2024,0.20,92.08,0.59,7.13,Laki-Laki
364,365,Perkotaan + Pedesaan,10-24,2024,38.60,60.75,0.46,0.19,Laki-Laki
365,366,Perkotaan + Pedesaan,25-44,2024,1.94,96.58,1.10,0.38,Laki-Laki
366,367,Perkotaan + Pedesaan,45-59,2024,0.60,95.77,1.22,2.41,Laki-Laki


In [5]:
df.head()

Unnamed: 0,No,Daerah,Kelompok Umur,Tahun,Belum Kawin,Kawin,Cerai Hidup,Cerai Mati,Jenis Kelamin
0,1,Perkotaan,10-24,2009,91.63,4.11,3.3,0.97,Perempuan
1,2,Perkotaan,25-44,2009,21.24,20.57,30.22,27.97,Perempuan
2,3,Perkotaan,45-59,2009,3.23,6.52,17.38,72.88,Perempuan
3,4,Perkotaan,60+,2009,1.35,2.08,5.04,91.52,Perempuan
4,5,Pedesaan,10-24,2009,42.37,37.71,15.46,4.46,Perempuan


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 368 entries, 0 to 367
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   No             368 non-null    int64  
 1   Daerah         368 non-null    object 
 2   Kelompok Umur  368 non-null    object 
 3   Tahun          368 non-null    int64  
 4   Belum Kawin    368 non-null    float64
 5   Kawin          368 non-null    float64
 6   Cerai Hidup    368 non-null    float64
 7   Cerai Mati     368 non-null    float64
 8   Jenis Kelamin  368 non-null    object 
dtypes: float64(4), int64(2), object(3)
memory usage: 26.0+ KB


In [13]:
df.isnull().sum()

No               0
Daerah           0
Kelompok Umur    0
Tahun            0
Belum Kawin      0
Kawin            0
Cerai Hidup      0
Cerai Mati       0
Jenis Kelamin    0
dtype: int64

In [7]:
# Hapus baris kosong
df.dropna(inplace=True)

In [15]:
# Filter jenis kelamin Perempuan
df_perempuan = df[df['Jenis Kelamin'] == 'Perempuan'].copy()

In [16]:
# Konversi menjadi numerik
for col in ['Belum Kawin', 'Kawin', 'Cerai Hidup', 'Cerai Mati']:
    df_perempuan[col] = pd.to_numeric(df_perempuan[col], errors='coerce')

In [17]:
# Encoder kategori
df_perempuan = pd.get_dummies(df_perempuan, columns=['Daerah', 'Kelompok Umur'])

In [18]:
# Normalisasi
scaler = MinMaxScaler()
cols_to_scale = ['Tahun', 'Belum Kawin', 'Kawin', 'Cerai Hidup', 'Cerai Mati']
df_scaled = df_perempuan.copy()
df_scaled[cols_to_scale] = scaler.fit_transform(df_perempuan[cols_to_scale])

In [20]:
# Fungsi Time Series mengambil 5 tahun ke belakang
def create_dataset(data, look_back=5):
    X, Y = [], []
    for i in range(len(data)-look_back-1):
        X.append(data[i:(i+look_back)])
        Y.append(data[i + look_back])
    return np.array(X), np.array(Y)

features = df_scaled.drop(columns=['Tahun']).columns.tolist()
target = ['Belum Kawin', 'Kawin', 'Cerai Hidup', 'Cerai Mati']

X_data = df_scaled[features].values
y_data = df_scaled[target].values

X, y = create_dataset(X_data, look_back=5)

### Modelling

In [21]:
# import library
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *

In [22]:
# Model 1 : CNN
model_cnn = Sequential()
model_cnn.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model_cnn.add(Flatten())
model_cnn.add(Dense(y.shape[1]))
model_cnn.compile(optimizer='adam', loss='mse')
history_cnn = model_cnn.fit(X, y, epochs=100, batch_size=8, validation_split=0.2, verbose=0)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Invalid dtype: object

In [None]:
# Model 2 :LSTM
model_lstm = Sequential()
model_lstm.add(LSTM(100, return_sequences=False, input_shape=(X.shape[1], X.shape[2])))
model_lstm.add(Dense(y.shape[1]))
model_lstm.compile(optimizer='adam', loss='mse')
history_lstm = model_lstm.fit(X, y, epochs=100, batch_size=8, validation_split=0.2, verbose=0)

  super().__init__(**kwargs)


ValueError: Invalid dtype: object

In [None]:
# Model 3 : CNN-LSTM
model_cnn_lstm = Sequential()
model_cnn_lstm.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model_cnn_lstm.add(LSTM(100, return_sequences=False))
model_cnn_lstm.add(Dense(y.shape[1]))
model_cnn_lstm.compile(optimizer='adam', loss='mse')
history_cnn_lstm = model_cnn_lstm.fit(X, y, epochs=100, batch_size=8, validation_split=0.2, verbose=0)

ValueError: Invalid dtype: object

### Evaluasi Model

In [None]:
# import library
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [None]:
# evaluasi setiap model
def evaluate_model(model, name):
    predicted = model.predict(X)

    rmse = np.sqrt(mean_squared_error(y, predicted))
    mae = mean_absolute_error(y, predicted)
    r2 = r2_score(y, predicted)

    print(f"\n[{name}]")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE : {mae:.4f}")
    print(f"R²  : {r2:.4f}")
    return rmse, mae, r2

results = {}
results['CNN'] = evaluate_model(model_cnn, "CNN")
results['LSTM'] = evaluate_model(model_lstm, "LSTM")
results['CNN-LSTM'] = evaluate_model(model_cnn_lstm, "CNN-LSTM")

In [None]:
# prediksi tahun 2025
last_5_years = X[-1:]  # Input terakhir dari data

predicted_cnn = model_cnn.predict(last_5_years)
predicted_lstm = model_lstm.predict(last_5_years)
predicted_cnn_lstm = model_cnn_lstm.predict(last_5_years)

In [None]:
# inverse scaling
pred_cnn_unscaled = scaler.inverse_transform(np.hstack((last_5_years[:, -1, 0].reshape(-1, 1), predicted_cnn)))[:, 1:]
pred_lstm_unscaled = scaler.inverse_transform(np.hstack((last_5_years[:, -1, 0].reshape(-1, 1), predicted_lstm)))[:, 1:]
pred_cnn_lstm_unscaled = scaler.inverse_transform(np.hstack((last_5_years[:, -1, 0].reshape(-1, 1), predicted_cnn_lstm)))[:, 1:]

In [None]:
# visualisasi prediksi
labels = ['Belum Kawin', 'Kawin', 'Cerai hidup', 'Cerai mati']

plt.figure(figsize=(14, 6))

plt.subplot(1, 3, 1)
x_indexes = np.arange(len(labels))
plt.bar(x_indexes, pred_cnn_unscaled[0], width=0.6, label='Prediksi CNN')
plt.xticks(x_indexes, labels)
plt.title('CNN Prediction')
plt.grid(True)

plt.subplot(1, 3, 2)
plt.bar(x_indexes, pred_lstm_unscaled[0], width=0.6, label='Prediksi LSTM')
plt.xticks(x_indexes, labels)
plt.title('LSTM Prediction')
plt.grid(True)

plt.subplot(1, 3, 3)
plt.bar(x_indexes, pred_cnn_lstm_unscaled[0], width=0.6, label='Prediksi CNN-LSTM')
plt.xticks(x_indexes, labels)
plt.title('CNN-LSTM Prediction')
plt.grid(True)

plt.tight_layout()
plt.show()

### Deploy


In [None]:
from tensorflow.keras.models import save_model


# menyimpan model terbaik
save_model(model_cnn_lstm, "model_cnn_lstm_2025.h5")