In [9]:
import pandas as pd
import numpy as np

np.random.seed(42)
n_total = 5000
n_baik = n_total // 3
n_cukup = (n_total - n_baik) // 2
n_buruk = n_total - n_baik - n_cukup

def generate_data(n, range_dict):
    return {
        key: np.round(np.random.uniform(low, high, size=n), 2) if 'float' in dtype else np.random.randint(low, high + 1, size=n)
        for key, (low, high, dtype) in range_dict.items()
    }

# Rentang aman untuk "Baik"
baik_ranges = {
    "Nitrogen (mg/kg)": (234, 893, 'int'),
    "Fosfor (mg/kg)": (116, 393, 'int'),
    "Kalium (mg/kg)": (1132, 3772, 'int'),
    "pH": (5.5, 6.5, 'float'),
    "Kelembapan (%)": (50, 70, 'float'),
    "Suhu (°C)": (20, 35, 'float'),
    "EC (dS/m)": (0.1, 2.0, 'float'),
    "Salinitas (ppt)": (128, 339, 'float'),
    "TDS (ppm)": (1000, 4000, 'float')  # Tidak memengaruhi klasifikasi
}

# Rentang umum tapi tidak cukup baik (salah satu syarat tidak terpenuhi)
cukup_ranges = {
    "Nitrogen (mg/kg)": (200, 1600, 'int'),
    "Fosfor (mg/kg)": (100, 2000, 'int'),
    "Kalium (mg/kg)": (100, 2000, 'int'),
    "pH": (4.5, 8.0, 'float'),  # bisa cukup
    "Kelembapan (%)": (30, 75, 'float'),  # cukup
    "Suhu (°C)": (15, 45, 'float'),
    "EC (dS/m)": (0.05, 10, 'float'),
    "Salinitas (ppt)": (50, 1000, 'float'),
    "TDS (ppm)": (500, 6000, 'float')
}

# Rentang "Buruk"
buruk_ranges = {
    "Nitrogen (mg/kg)": (200, 1600, 'int'),
    "Fosfor (mg/kg)": (100, 2000, 'int'),
    "Kalium (mg/kg)": (100, 2000, 'int'),
    "pH": (4.5, 9.0, 'float'),  # termasuk yang buruk (ekstrem)
    "Kelembapan (%)": (10, 29.9, 'float'),  # tidak cukup
    "Suhu (°C)": (15, 45, 'float'),
    "EC (dS/m)": (0.05, 10, 'float'),
    "Salinitas (ppt)": (50, 1000, 'float'),
    "TDS (ppm)": (500, 6000, 'float')
}

# Buat data
df_baik = pd.DataFrame(generate_data(n_baik, baik_ranges))
df_cukup = pd.DataFrame(generate_data(n_cukup, cukup_ranges))
df_buruk = pd.DataFrame(generate_data(n_buruk, buruk_ranges))

# Gabungkan
df = pd.concat([df_baik, df_cukup, df_buruk], ignore_index=True)

# Fungsi klasifikasi
def classify_condition(row):
    good = (
        50 <= row["Kelembapan (%)"] <= 70 and
        20 <= row["Suhu (°C)"] <= 35 and
        0.1 <= row["EC (dS/m)"] <= 2.0 and
        5.5 <= row["pH"] <= 6.5 and
        234 <= row["Nitrogen (mg/kg)"] <= 893 and
        116 <= row["Fosfor (mg/kg)"] <= 393 and
        1132 <= row["Kalium (mg/kg)"] <= 3772 and
        128 <= row["Salinitas (ppt)"] <= 339
    )
    if good:
        return "Baik"
    elif 4.5 <= row["pH"] <= 8 and 30 <= row["Kelembapan (%)"] <= 75:
        return "Cukup"
    else:
        return "Buruk"

# Fungsi rekomendasi
def recommend_action(row):
    if row["Nitrogen (mg/kg)"] < 500:
        pupuk = "tambahkan pupuk banyak"
    elif row["Nitrogen (mg/kg)"] < 893:
        pupuk = "tambahkan pupuk sedang"
    else:
        pupuk = "tambahkan pupuk dikit"
    
    air = "tambahkan air" if row["Kelembapan (%)"] < 50 else "cukup air"
    return f"{pupuk}, {air}"

# Terapkan fungsi
df["Kondisi Tanah"] = df.apply(classify_condition, axis=1)
df["Rekomendasi Tindakan"] = df.apply(recommend_action, axis=1)

# Cek distribusi akhir
print(df["Kondisi Tanah"].value_counts())


Kondisi Tanah
Cukup    1667
Buruk    1667
Baik     1666
Name: count, dtype: int64


In [4]:
import h5py

try:
    with h5py.File("model_ann_last.keras", "r") as f:
        print("Valid Keras HDF5 file.")
except Exception as e:
    print("❌ Bukan file model HDF5 / .keras:", e)


❌ Bukan file model HDF5 / .keras: Unable to synchronously open file (file signature not found)


In [11]:
df.head()

Unnamed: 0,Nitrogen (mg/kg),Fosfor (mg/kg),Kalium (mg/kg),pH,Kelembapan (%),Suhu (°C),EC (dS/m),Salinitas (ppt),TDS (ppm),Kondisi Tanah,Rekomendasi Tindakan
0,336,197,3533,6.35,64.64,32.11,0.34,139.65,1192.41,Baik,"tambahkan pupuk banyak, cukup air"
1,669,326,2904,6.46,53.48,21.64,1.87,329.43,1678.81,Baik,"tambahkan pupuk sedang, cukup air"
2,504,265,3092,6.07,57.22,25.96,1.25,155.13,3378.71,Baik,"tambahkan pupuk sedang, cukup air"
3,340,152,2735,6.01,62.75,25.05,1.35,275.16,3413.19,Baik,"tambahkan pupuk banyak, cukup air"
4,305,174,1840,5.59,51.41,28.54,0.86,234.81,1621.25,Baik,"tambahkan pupuk banyak, cukup air"


In [13]:
import random

# Buat copy agar tidak merusak original
df_noisy = df.copy()

# Tentukan persentase data yang ingin di-noise-kan
noise_pct = 0.05
n_noise = int(noise_pct * len(df_noisy))

# --- NOISE untuk Kolom "Rekomendasi Tindakan" ---
unique_rekom = df_noisy["Rekomendasi Tindakan"].unique()
noise_indices_rekom = np.random.choice(df_noisy.index, size=n_noise, replace=False)

for idx in noise_indices_rekom:
    current = df_noisy.loc[idx, "Rekomendasi Tindakan"]
    choices = [r for r in unique_rekom if r != current]
    df_noisy.at[idx, "Rekomendasi Tindakan"] = random.choice(choices)

# --- NOISE untuk Kolom "Kondisi Tanah" ---
unique_kondisi = df_noisy["Kondisi Tanah"].unique()
noise_indices_kondisi = np.random.choice(df_noisy.index, size=n_noise, replace=False)

for idx in noise_indices_kondisi:
    current = df_noisy.loc[idx, "Kondisi Tanah"]
    choices = [k for k in unique_kondisi if k != current]
    df_noisy.at[idx, "Kondisi Tanah"] = random.choice(choices)

# Cek distribusi hasil
print("Distribusi label setelah noise:")
print(df_noisy["Kondisi Tanah"].value_counts())
print(df_noisy["Rekomendasi Tindakan"].value_counts())


Distribusi label setelah noise:
Kondisi Tanah
Buruk    1685
Baik     1663
Cukup    1652
Name: count, dtype: int64
Rekomendasi Tindakan
tambahkan pupuk dikit, tambahkan air     1215
tambahkan pupuk sedang, cukup air        1204
tambahkan pupuk banyak, cukup air         892
tambahkan pupuk sedang, tambahkan air     668
tambahkan pupuk banyak, tambahkan air     556
tambahkan pupuk dikit, cukup air          465
Name: count, dtype: int64


In [14]:
df_combined = pd.concat([df, df_noisy], ignore_index=True)

In [15]:
df_combined.to_csv("Data_Sensor_NPK_Dummy_Last.csv", index= False)