In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
import pickle



In [2]:
# ============================================
# 1️⃣ LOAD & PREPARE DATA
# ============================================
df = pd.read_csv("soil_dataset.csv")
features = ['N', 'P', 'K', 'ph', 'EC', 'S', 'Cu', 'Fe', 'Mn', 'Zn', 'B']
X = df[features]
y = df['Soil_Score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# ============================================
# 4️⃣ NORMALISASI FITUR
# ============================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [4]:
# ============================================
# 5️⃣ TRAIN REGRESSOR
# ============================================
model = RandomForestRegressor(
    n_estimators=300,
    max_depth=12,
    random_state=42,
    n_jobs=-1
)
model.fit(X_train_scaled, y_train)

In [5]:
# ============================================
# 6️⃣ EVALUASI MODEL
# ============================================
y_pred = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("✅ Soil Quality Regression Model Trained")
print(f"R² Score: {r2:.4f}")
print(f"RMSE: {rmse:.4f}")

✅ Soil Quality Regression Model Trained
R² Score: 0.9132
RMSE: 3.7781


In [6]:
# ============================================
# 7️⃣ SIMPAN MODEL & SCALER
# ============================================
with open("soil_quality_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("soil_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\nModel dan scaler berhasil disimpan:")
print("- soil_quality_model.pkl")
print("- soil_scaler.pkl")


Model dan scaler berhasil disimpan:
- soil_quality_model.pkl
- soil_scaler.pkl


In [7]:
# ============================================
# 8️⃣ CONTOH PREDIKSI BARU
# ============================================
sample = X_test.iloc[:3]
sample_scaled = scaler.transform(sample)
preds = model.predict(sample_scaled)
print("\nContoh Prediksi Soil_Score:")
for i, val in enumerate(preds):
    print(f"Sampel {i+1}: {val:.2f}")


Contoh Prediksi Soil_Score:
Sampel 1: 67.44
Sampel 2: 40.83
Sampel 3: 69.34
