In [48]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

### Read Dataset

In [49]:
data_path = "../data/data_hortikultura.xlsx"

In [50]:
df_lahan = pd.read_excel(data_path, sheet_name='Data_Kecocokan_Lahan', header=0)
df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,14-33,622-1425,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,14-33,622-1425,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,14-33,622-1425,45-80,mediteran,"liat berpasir ,lempung bedebu","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,14-33,622-1425,45-80,mediteran,"liat berpasir, lempung bedebu","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,23-33,1173-2277,74-86,"regosol, latosol, grumosol","liat, Liat berpasir,Lempung berpasir dan lempu...","4,2- 6",15-39,0-1000


In [51]:
df_profil_lahan = pd.read_excel(data_path, sheet_name='Data_Profil_Ideal_Kecocokan_Lhn', header=0)
df_profil_lahan.head()

Unnamed: 0,Jenis Tanaman,Waktu Panen,Suhu S1,Suhu S2,Suhu S3,Suhu SN,Curah Hujan S1,Curah Hujan S2,Curah Hujan S3,Curah Hujan SN,...,Tingkat Kemasaman Tanah/Ph S3,Tingkat Kemasaman Tanah/Ph SN,Kemiringan Tanah S1,Kemiringan Tanah S2,Kemiringan Tanah S3,Kemiringan Tanah SN,Tinggi Tempat/Topografi S1,Tinggi Tempat/Topografi S2,Tinggi Tempat/Topografi S3,Tinggi Tempat/Topografi SN
0,Bawang Merah,2-3 bln,10-25,25-30,30-35,>35,350-600,600-800,800-1600,>1600,...,>8,-,<8,8-16,16-30,>30,700-1000,,,
1,Cabai Merah,2 mg,18-26,26-27,27-28,>28,600-1200,1200-1400,>1400,-,...,>8,-,<8,8-16,16-30,>30,900-1800,,,
2,Cabai Rawit,2 mg,18-26,26-27,27-28,>28,600-1200,1200-1400,>1400,-,...,>8,-,<8,8-16,16-30,>30,900-1800,,,
3,Tomat,2 mg,18-26,26-30,30-35,>35,400-700,700-800,>800,-,...,>8,-,<8,8-16,16-30,>30,350-750,,,
4,Wortel,2-3 bln,16-18,18-20,20-23,>23,>80,65-80,50-65,<50,...,>8,-,<8,8-16,16-30,>30,1000-1200,,,


### Preprocess Data

In [52]:
def split_string(input_string):
    # Mengganti karakter newline dengan spasi
    cleaned_string = input_string.replace('\n', ' ')

    # Mengubah semua karakter menjadi huruf kecil
    cleaned_string = cleaned_string.lower()

    # Pisahkan string pertama dengan pemisah "dan"
    parts_with_and = cleaned_string.split(" dan ")

    # Buat list kosong untuk menyimpan hasil akhir
    result_list = []

    # Iterasi melalui setiap bagian yang telah dipisahkan dengan "dan"
    for part in parts_with_and:
        # Kemudian, pisahkan setiap bagian dengan pemisah "koma"
        subparts = part.split(",")
        # Hapus spasi di awal dan akhir setiap elemen
        subparts = [subpart.strip() for subpart in subparts]
        # Tambahkan setiap elemen dalam subparts ke dalam result_list
        result_list.extend(subparts)

    # Hasil akhir adalah list yang telah dipisahkan
    return result_list

In [53]:
# test function
split_string(df_lahan.loc[4, 'Tekstur Tanah'])

['liat', 'liat berpasir', 'lempung berpasir', 'lempung bedebu']

In [67]:
def replace_with_mid_value(value):
    # parse value with comma
    if ',' in value:
        value = value.replace(',', '.')
        min_val, max_val = map(float, value.split("-"))
    else:
        # split value into 2
        min_val, max_val = map(int, value.split("-"))
    # calculate mid value
    mid_val = (min_val + max_val) / 2
    return mid_val

In [69]:
def replace_kemiringan(value):
    if value == 60:
        return value
    
    if value == '> 61':
        value = '61 - 100'

    value = value.replace('<', '').replace('>', '')

    return replace_with_mid_value(value)

**Temperature Range**

In [56]:
# Terapkan fungsi penggantian pada kolom "Suhu (°C)"
df_lahan["Suhu"] = df_lahan["Suhu"].apply(replace_with_mid_value)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,622-1425,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,622-1425,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,622-1425,45-80,mediteran,"liat berpasir ,lempung bedebu","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,622-1425,45-80,mediteran,"liat berpasir, lempung bedebu","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1173-2277,74-86,"regosol, latosol, grumosol","liat, Liat berpasir,Lempung berpasir dan lempu...","4,2- 6",15-39,0-1000


**Rainfall Range**

In [57]:
# Terapkan fungsi penggantian pada kolom "Curah Hujan (mm/thn)"
df_lahan["Curah Hujan"] = df_lahan["Curah Hujan"].apply(replace_with_mid_value)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,45-80,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,45-80,mediteran,"liat berpasir ,lempung bedebu","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,45-80,mediteran,"liat berpasir, lempung bedebu","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,74-86,"regosol, latosol, grumosol","liat, Liat berpasir,Lempung berpasir dan lempu...","4,2- 6",15-39,0-1000


**Humidity Range**

In [58]:
# Terapkan fungsi penggantian pada kolom "Kelembapan Udara"
df_lahan["Kelembapan Udara"] = df_lahan["Kelembapan Udara"].apply(replace_with_mid_value)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,mediteran,"lempung berpasir, bedebu","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,mediteran,"liat berpasir ,lempung bedebu","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,mediteran,"liat berpasir, lempung bedebu","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"regosol, latosol, grumosol","liat, Liat berpasir,Lempung berpasir dan lempu...","4,2- 6",15-39,0-1000


**Jenis Tanah**

In [60]:
# Terapkan fungsi konversi pada kolom "H" (Jenis Tanah) dan simpan hasilnya di kolom "H" juga
df_lahan['Jenis Tanah'] = df_lahan['Jenis Tanah'].apply(split_string)
df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"lempung berpasir, bedebu","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"lempung berpasir, bedebu","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"liat berpasir ,lempung bedebu","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"liat berpasir, lempung bedebu","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","liat, Liat berpasir,Lempung berpasir dan lempu...","4,2- 6",15-39,0-1000


**Tekstur Tanah**

In [61]:
# Terapkan fungsi konversi pada kolom "H" (Jenis Tanah) dan simpan hasilnya di kolom "H" juga
df_lahan['Tekstur Tanah'] = df_lahan['Tekstur Tanah'].apply(split_string)
df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]","6 - 7,5",0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]","6 - 7,5",0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]","4,2- 6",26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]","4,2- 6",26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","[liat, liat berpasir, lempung berpasir, lempun...","4,2- 6",15-39,0-1000


**pH Range**

In [68]:
# Terapkan fungsi penggantian pada kolom "Ph Tanah"
df_lahan["Ph Tanah"] = df_lahan["Ph Tanah"].apply(replace_with_mid_value)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,0 - < 4,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,0 - < 4,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,26-45,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,26-45,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","[liat, liat berpasir, lempung berpasir, lempun...",5.1,15-39,0-1000


**Kemiringan Lahan**

In [70]:
# Terapkan fungsi penggantian pada kolom "Kemiringan Lahan"
df_lahan["Kemiringan Lahan"] = df_lahan["Kemiringan Lahan"].apply(replace_kemiringan)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,0-75
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,25-75
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,0-500
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,0-500
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","[liat, liat berpasir, lempung berpasir, lempun...",5.1,27.0,0-1000


**Tinggi Tempat Range**

In [71]:
# Terapkan fungsi penggantian pada kolom "Tinggi Tempat"
df_lahan["Tinggi Tempat"] = df_lahan["Tinggi Tempat"].apply(replace_with_mid_value)

df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,37.5
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,50.0
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,250.0
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,250.0
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","[liat, liat berpasir, lempung berpasir, lempun...",5.1,27.0,500.0


### Export Preprocessed Data

In [72]:
df_lahan.head()

Unnamed: 0,Kecamatan,Desa/Kelurahan,Luas Lahan,Iklim,Suhu,Curah Hujan,Kelembapan Udara,Jenis Tanah,Tekstur Tanah,Ph Tanah,Kemiringan Lahan,Tinggi Tempat
0,Alok,Kota Uneng,255,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,37.5
1,Alok,Nangalimang,203,Subtropik,23.5,1023.5,62.5,[mediteran],"[lempung berpasir, bedebu]",6.75,2.0,50.0
2,Alok Barat,Wuring,1650,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,250.0
3,Alok Barat,Wolomarang,700,Subtropik,23.5,1023.5,62.5,[mediteran],"[liat berpasir, lempung bedebu]",5.1,35.5,250.0
4,Nita,Tilang,1545,Subtropik,28.0,1725.0,80.0,"[regosol, latosol, grumosol]","[liat, liat berpasir, lempung berpasir, lempun...",5.1,27.0,500.0


In [75]:
df_lahan.drop("Kecamatan", axis=1).to_csv("../data/df_lahan_preprocessed.csv")

### Interpolation