In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor # Model Training
from sklearn.model_selection import train_test_split # Splitting Data
from sklearn.metrics import mean_squared_error #Model Evaluation
import pickle

In [2]:
data = pd.read_csv('data_harga_telur.csv')
print(data)

   No.    Lokasi       Komoditas  Tahun  Januari  Februari  Maret  April  \
0    1  Nasional  Telur Ayam Ras   2014    20457     21038  19063  18365   
1    2  Nasional  Telur Ayam Ras   2015    22814     22266  20675  19943   
2    3  Nasional  Telur Ayam Ras   2016    25537     24505  23290  22448   
3    4  Nasional  Telur Ayam Ras   2017    23584     22530  21940  22743   
4    5  Nasional  Telur Ayam Ras   2018    25649     24739  24489  23857   
5    6  Nasional  Telur Ayam Ras   2019    26239     25061  24158  24403   
6    7  Nasional  Telur Ayam Ras   2020    25370     25292  25813  26495   
7    8  Nasional  Telur Ayam Ras   2021    26417     25772  25191  25708   
8    9  Nasional  Telur Ayam Ras   2022    28287     24546  25342  26409   
9   10  Nasional  Telur Ayam Ras   2023    30570     29361  29445  30194   

     Mei   Juni   Juli  Agustus  September  Oktober  November  Desember  
0  19017  20513  21056    20743      20450    20000     19789     20684  
1  21274  22710

In [3]:
id_vars = ['Tahun']  # Columns to keep as identifiers
value_vars = ['Januari','Februari','Maret', 'April', 'Mei','Juni','Juli','Agustus',
        'September','Oktober','November','Desember']  # Columns to melt
melted_data = data.melt(id_vars=id_vars, value_vars=value_vars, var_name='Bulan', value_name='Harga')
print(melted_data)

     Tahun     Bulan  Harga
0     2014   Januari  20457
1     2015   Januari  22814
2     2016   Januari  25537
3     2017   Januari  23584
4     2018   Januari  25649
..     ...       ...    ...
115   2019  Desember  25346
116   2020  Desember  27892
117   2021  Desember  26818
118   2022  Desember  31096
119   2023  Desember  29317

[120 rows x 3 columns]


In [4]:
month_mapping = {
    'Januari': 1, 'Februari': 2, 'Maret': 3, 'April': 4, 'Mei': 5, 'Juni': 6,
    'Juli': 7, 'Agustus': 8, 'September': 9, 'Oktober': 10, 'November': 11, 'Desember': 12
}

In [5]:
melted_data['Bulan'] = melted_data['Bulan'].map(month_mapping)

In [6]:
print(melted_data)

     Tahun  Bulan  Harga
0     2014      1  20457
1     2015      1  22814
2     2016      1  25537
3     2017      1  23584
4     2018      1  25649
..     ...    ...    ...
115   2019     12  25346
116   2020     12  27892
117   2021     12  26818
118   2022     12  31096
119   2023     12  29317

[120 rows x 3 columns]


In [7]:
X = melted_data[['Tahun', 'Bulan']]
y = melted_data['Harga']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
#Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
#train the model
model.fit(X_train, y_train)

In [10]:
#make predictions on the test set
y_pred = model.predict(X_test)

In [11]:
#Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 914982.2895166666


In [12]:
with open('rf_model.pkl', 'wb') as file:
    pickle.dump(model, file)