# Random Forest untuk Prediksi Time Series

- Muat data dan pra-pemrosesan
- Bentuk fitur lag dan rolling
- Split train/valid/test secara time-aware
- Latih RandomForest, evaluasi (MAE, RMSE, MAPE) dan plot
- Hyperparameter tuning (TimeSeriesSplit + RandomizedSearchCV)
- Walk-forward evaluation
- Feature importance dan simpan model

In [11]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error

plt.style.use("seaborn-v0_8")
pd.options.display.max_columns = 100

In [10]:

# Nama file Excel asli Anda
file_excel_asli = "Data Penelitian.xlsx"

# Membaca sheet 'Luas Panen (Ha)' dari file Excel
df_luas = pd.read_excel(file_excel_asli, sheet_name="Luas Panen (Ha)")

# Membaca sheet 'Hasil Produksi' dari file Excel yang sama
df_produksi = pd.read_excel(file_excel_asli, sheet_name="Hasil Produksi (Ton)")

In [16]:
df_luas

Unnamed: 0,Kecamatan,2013,2014,2015,2016,2017,2018,2019
0,Ayah,"2.495,00","2.476,00","2.496,00","2.438,90","2.409,40","2.441,80","1.902,00"
1,Buayan,"2.170,00","2.172,00","2.171,00","2.169,10","2.216,90","2.211,20","2.020,40"
2,Puring,"4.823,00","4.754,00","4.801,00","4.918,80","4.839,80","4.781,00","3.346,40"
3,Petanahan,"3.824,00","3.824,00","3.824,00","3.840,20","3.823,50","3.823,60","3.806,10"
4,Klirong,"2.568,00","2.568,00","2.568,00","2.568,00","2.609,10","2.650,20","2.539,70"
5,Buluspesantren,"3.954,00","3.990,00","4.000,00","4.015,30","4.085,70","4.085,70","4.024,20"
6,Ambal,"5.358,00","4.563,00","5.035,00","5.011,70","4.997,30","5.202,40","4.795,70"
7,Mirit,"4.015,00","3.977,00","3.978,00","3.962,50","3.970,30","3.961,50","3.822,70"
8,Bonorowo,"2.572,00","2.466,00","2.571,00","2.536,70","2.571,90","2.535,80","2.535,60"
9,Prembun,"1.867,00","1.869,00","1.990,00","1.867,80","1.867,80","1.867,80","1.867,70"


In [4]:
df_luas

Unnamed: 0,Kecamatan,2013,2014,2015,2016,2017,2018,2019
0,Ayah,"2.495,00","2.476,00","2.496,00","2.438,90","2.409,40","2.441,80","1.902,00"
1,Buayan,"2.170,00","2.172,00","2.171,00","2.169,10","2.216,90","2.211,20","2.020,40"
2,Puring,"4.823,00","4.754,00","4.801,00","4.918,80","4.839,80","4.781,00","3.346,40"
3,Petanahan,"3.824,00","3.824,00","3.824,00","3.840,20","3.823,50","3.823,60","3.806,10"
4,Klirong,"2.568,00","2.568,00","2.568,00","2.568,00","2.609,10","2.650,20","2.539,70"
5,Buluspesantren,"3.954,00","3.990,00","4.000,00","4.015,30","4.085,70","4.085,70","4.024,20"
6,Ambal,"5.358,00","4.563,00","5.035,00","5.011,70","4.997,30","5.202,40","4.795,70"
7,Mirit,"4.015,00","3.977,00","3.978,00","3.962,50","3.970,30","3.961,50","3.822,70"
8,Bonorowo,"2.572,00","2.466,00","2.571,00","2.536,70","2.571,90","2.535,80","2.535,60"
9,Prembun,"1.867,00","1.869,00","1.990,00","1.867,80","1.867,80","1.867,80","1.867,70"


In [12]:
# Fungsi untuk membersihkan dan mengkonversi kolom numerik
def clean_numeric_columns(df):
	df_clean = df.copy()
	# Untuk setiap kolom kecuali 'Kecamatan'
	for col in df_clean.columns:
		if col != 'Kecamatan':
			# Hapus titik sebagai pemisah ribuan dan ganti koma dengan titik sebagai desimal
			df_clean[col] = df_clean[col].astype(str).str.replace('.', '', regex=False)
			df_clean[col] = df_clean[col].str.replace(',', '.', regex=False)
			# Konversi ke float
			df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
	return df_clean

# Terapkan ke kedua dataframe
df_luas = clean_numeric_columns(df_luas)
df_produksi = clean_numeric_columns(df_produksi)

print("--- Tipe Data df_luas Setelah Konversi ---")
print(df_luas.dtypes)
print("\n--- Tipe Data df_produksi Setelah Konversi ---")
print(df_produksi.dtypes)
print("\n--- Contoh Data df_luas ---")
print(df_luas.head())


--- Tipe Data df_luas Setelah Konversi ---
Kecamatan     object
2013         float64
2014         float64
2015         float64
2016         float64
2017         float64
2018         float64
2019         float64
dtype: object

--- Tipe Data df_produksi Setelah Konversi ---
Kecamatan     object
2013         float64
2014         float64
2015         float64
2016         float64
2017         float64
2018         float64
2019         float64
dtype: object

--- Contoh Data df_luas ---
   Kecamatan    2013    2014    2015    2016    2017    2018    2019
0       Ayah  2495.0  2476.0  2496.0  2438.9  2409.4  2441.8  1902.0
1     Buayan  2170.0  2172.0  2171.0  2169.1  2216.9  2211.2  2020.4
2     Puring  4823.0  4754.0  4801.0  4918.8  4839.8  4781.0  3346.4
3  Petanahan  3824.0  3824.0  3824.0  3840.2  3823.5  3823.6  3806.1
4    Klirong  2568.0  2568.0  2568.0  2568.0  2609.1  2650.2  2539.7


In [14]:
df_luas

Unnamed: 0,Kecamatan,2013,2014,2015,2016,2017,2018,2019
0,Ayah,2495.0,2476.0,2496.0,2438.9,2409.4,2441.8,1902.0
1,Buayan,2170.0,2172.0,2171.0,2169.1,2216.9,2211.2,2020.4
2,Puring,4823.0,4754.0,4801.0,4918.8,4839.8,4781.0,3346.4
3,Petanahan,3824.0,3824.0,3824.0,3840.2,3823.5,3823.6,3806.1
4,Klirong,2568.0,2568.0,2568.0,2568.0,2609.1,2650.2,2539.7
5,Buluspesantren,3954.0,3990.0,4000.0,4015.3,4085.7,4085.7,4024.2
6,Ambal,5358.0,4563.0,5035.0,5011.7,4997.3,5202.4,4795.7
7,Mirit,4015.0,3977.0,3978.0,3962.5,3970.3,3961.5,3822.7
8,Bonorowo,2572.0,2466.0,2571.0,2536.7,2571.9,2535.8,2535.6
9,Prembun,1867.0,1869.0,1990.0,1867.8,1867.8,1867.8,1867.7


In [15]:
df_produksi

Unnamed: 0,Kecamatan,2013,2014,2015,2016,2017,2018,2019
0,Ayah,13489.6,13040.05,15492.85,14793.87,13408.31,13918.26,10426.71
1,Buayan,11732.44,11135.29,13200.25,13157.32,12337.05,12603.84,13134.72
2,Puring,26076.28,26826.17,31557.32,29836.44,26933.49,27251.7,16966.36
3,Petanahan,20675.04,22186.72,25718.73,23293.87,21277.78,21794.52,20043.91
4,Klirong,13884.28,14442.43,16815.26,15576.96,14519.64,15106.14,15580.4
5,Buluspesantren,21377.9,22915.24,26669.11,24355.99,22736.92,23288.49,30302.03
6,Ambal,28968.84,27819.63,34583.41,30399.95,27809.97,29653.68,31186.49
7,Mirit,21707.71,20717.86,24400.43,24035.72,22094.72,22580.55,27008.03
8,Bonorowo,13905.91,14124.39,17341.58,15387.1,14312.62,14454.06,18351.34
9,Prembun,10094.22,9106.34,11748.31,11329.69,10394.31,10646.46,13560.04


In [17]:
df_produksi

Unnamed: 0,Kecamatan,2013,2014,2015,2016,2017,2018,2019
0,Ayah,"13.489,60","13.040,05","15.492,85","14.793,87","13.408,31","13.918,26","10.426,71"
1,Buayan,"11.732,44","11.135,29","13.200,25","13.157,32","12.337,05","12.603,84","13.134,72"
2,Puring,"26.076,28","26.826,17","31.557,32","29.836,44","26.933,49","27.251,70","16.966,36"
3,Petanahan,"20.675,04","22.186,72","25.718,73","23.293,87","21.277,78","21.794,52","20.043,91"
4,Klirong,"13.884,28","14.442,43","16.815,26","15.576,96","14.519,64","15.106,14","15.580,40"
5,Buluspesantren,"21.377,90","22.915,24","26.669,11","24.355,99","22.736,92","23.288,49","30.302,03"
6,Ambal,"28.968,84","27.819,63","34.583,41","30.399,95","27.809,97","29.653,68","31.186,49"
7,Mirit,"21.707,71","20.717,86","24.400,43","24.035,72","22.094,72","22.580,55","27.008,03"
8,Bonorowo,"13.905,91","14.124,39","17.341,58","15.387,10","14.312,62","14.454,06","18.351,34"
9,Prembun,"10.094,22","9.106,34","11.748,31","11.329,69","10.394,31","10.646,46","13.560,04"


In [15]:
try:
    # Memproses file pertama
    df_luas = load_and_melt('Data Penelitian.xlsx - Luas Panen (Ha).csv', 'Luas Panen (Ha)')
    # Memproses file kedua
    df_produksi = load_and_melt('Data Penelitian.xlsx - Hasil Produksi.csv', 'Hasil Produksi')
    
    # Menggabungkan kedua DataFrame berdasarkan Kecamatan dan Tahun
    df_data = pd.merge(df_luas, df_produksi, on=['Kecamatan', 'Tahun'])
    
    # Menghapus baris yang mungkin memiliki data hilang (NaN) setelah konversi
    df_data = df_data.dropna()
    
    # Tampilkan 5 baris pertama untuk inspeksi
    print("--- Data Gabungan (5 Baris Pertama) ---")
    print(df_data.head())
    
    # Tampilkan info tipe data
    print("\n--- Info Tipe Data ---")
    df_data.info()

except FileNotFoundError:
    print("Error: Pastikan kedua file CSV ada di direktori yang sama.")
except Exception as e:
    print(f"Terjadi error: {e}")

Error: Pastikan kedua file CSV ada di direktori yang sama.
