<a href="https://colab.research.google.com/github/byy-bayu/GEO-AI-Machine-Learning-for-Paddy-Phenology-Classification/blob/main/Data_Processing_Sentinel_1_phenology.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Processing Sentinel-1: Ekstraksi Nilai Piksel Sentinel-1 dari Asset GEE

**Kode ini dirancang untuk:**



*  Mengambil citra Sentinel-1 dari koleksi aset GEE
*  Memuat titik sampel yang digunakan sebagai area ekstraksi data
*  Menyesuaikan tipe data citra (dari UnsignedInt16 ke Float) dan mengekstrak nilai rata-rata piksel dalam setiap titik sample
*  Menyimpan hasil ekstraksi ke dalam CSV dan Shapefile sebagai input training data pada machine learning








### Import Library

In [None]:
import ee
import geemap
import geopandas as gpd
import pandas as pd
from google.colab import drive

### Autentikasi Cloud dan Project GEE




In [None]:
# Authenticate to Earth Engine.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='ee-bayuardianto104')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Load Dataset Citra dan titik Sample

In [None]:
# Load dataset ImageCollection
imageCollection = ee.ImageCollection('projects/ee-bayuardianto104/assets/Dataset_S1_ARD_Indramayu_2024')

# Load titik sampel (shapefile) dari penyimpanan Google Drive
shapefile_path = '/content/drive/MyDrive/Research/shp/LBS/training_phenology_data_2024.shp'
gdf = gpd.read_file(shapefile_path)

In [None]:
print(gdf)

    A_Foto A_Tgl_Kunj A_Tgl_Tana  A_Hst         A_Jenis_La     A_Varietas  \
0   Foto 1  29-Jan-24  17-Jan-24     12      Sawah Irigasi       Ciherang   
1   Foto 1  29-Jan-24  17-Jan-24     12      Sawah Irigasi       Ciherang   
2   Foto 1  26-Jan-24  11-Jan-24     15      Sawah Irigasi       Ciherang   
3   Foto 1  26-Jan-24  11-Jan-24     15      Sawah Irigasi       Ciherang   
4   Foto 1  23-Jan-24  11-Jan-24     12      Sawah Irigasi       Ciherang   
5   Foto 1  24-Jun-24  12-Jun-24     12      Sawah Irigasi  Inpari 32 HDB   
6   Foto 1  24-Jun-24  10-Jun-24     14      Sawah Irigasi  Inpari 32 HDB   
7   Foto 1  23-Jul-24  15-Jul-24      8      Sawah Irigasi  Inpari 32 HDB   
8   Foto 1  19-Jul-24  12-Jul-24      7      Sawah Irigasi  Inpari 32 HDB   
9   Foto 1  19-Jul-24   5-Jul-24     14      Sawah Irigasi  Inpari 32 HDB   
10  Foto 1  22-May-24  28-Mar-24     55      Sawah Irigasi      Lain-lain   
11  Foto 1  22-May-24  28-Mar-24     55      Sawah Irigasi      Lain-lain   

### Fungsi Ekstraksi Nilai Piksel per Titik





In [None]:
# Konversi GeoDataFrame ke FeatureCollection Earth Engine
def gdf_to_ee_feature_collection(gdf):
    features = []
    for _, row in gdf.iterrows():
        geom = row.geometry
        if geom.geom_type == 'Point':
            ee_geom = ee.Geometry.Point(geom.x, geom.y)
        elif geom.geom_type == 'MultiPoint':
            ee_geom = ee.Geometry.MultiPoint([ee.Geometry.Point(coord) for coord in geom.coords])
        else:
            raise ValueError("Geometry type not supported. Only Point and MultiPoint are supported.")
        feature = ee.Feature(ee_geom, row.drop('geometry').to_dict())
        features.append(feature)
    return ee.FeatureCollection(features)

In [None]:
# Fungsi untuk mengonversi citra ke float
def convert_to_float_and_scale(image):
    image_float = image.toFloat()
    image_scaled = image_float.divide(1000)
    # Salin properti metadata
    image_scaled = image_scaled.copyProperties(image, ['system:time_start', 'date'])
    return image_scaled

In [None]:
# Terapkan fungsi konversi ke seluruh ImageCollection
imageCollection = imageCollection.map(convert_to_float_and_scale)

In [None]:
# Fungsi untuk buffer titik
def buffer_points(feature_collection, radius):
    def buffer_feature(feature):
        # Get the geometry from the feature and apply buffer
        buffered_geometry = feature.geometry().buffer(radius)
        # Return a new feature with the buffered geometry and same properties
        return ee.Feature(buffered_geometry, feature.toDictionary())

    # Map the buffer function to each feature in the collection
    buffered_features = feature_collection.map(buffer_feature)
    return buffered_features

In [None]:
# Radius buffer dalam meter
radius = 0

In [None]:
# Fungsi untuk mengekstrak nilai dari area yang di-buffer
def extract_values_from_image_collection(image_collection, buffered_features, scale=10):
    def extract_values(image):
        samples = image.sampleRegions(
            collection=buffered_features,
            scale=scale,
            geometries=True #False
        )
        date = image.date().format('YYYY-MM-dd')
        samples = samples.map(lambda feature: feature.set('date', date))
        return samples

    extracted_features = image_collection.map(extract_values).flatten()
    return extracted_features

In [None]:
# Bagi titik sampel menjadi kelompok-kelompok kecil
def split_gdf(gdf, chunk_size=50):
    return [gdf[i:i + chunk_size] for i in range(0, len(gdf), chunk_size)]

In [None]:
# Proses ekstraksi untuk setiap kelompok titik sampel
all_results = []
chunk_size = 50  # Jumlah titik sampel per kelompok
gdf_chunks = split_gdf(gdf, chunk_size)

for i, chunk in enumerate(gdf_chunks):
    print(f"Memproses kelompok titik sampel {i + 1} dari {len(gdf_chunks)}...")
    ee_feature_collection = gdf_to_ee_feature_collection(chunk)
    buffered_features = buffer_points(ee_feature_collection, radius)
    extracted_features = extract_values_from_image_collection(imageCollection, ee_feature_collection, scale=10)
    extracted_values = extracted_features.getInfo()
    features = extracted_values['features']
    data = [feat['properties'] for feat in features]
    all_results.extend(data)

Memproses kelompok titik sampel 1 dari 1...


In [None]:
    # Simpan hasil ekstraksi
    data = [feat['properties'] for feat in features]
    all_results.extend(data)

In [None]:
# Konversi hasil ekstraksi ke pandas DataFrame
df = pd.DataFrame(all_results)

In [None]:
# Simpan DataFrame ke CSV
output_csv_path = '/content/drive/MyDrive/Research/Dataset training data.csv'  # Sesuaikan dengan path yang diinginkan
df.to_csv(output_csv_path, index=False)

In [None]:
# Menampilkan informasi tentang penyimpanan file
print(f"File CSV telah disimpan di: {output_csv_path}")

# Menampilkan ukuran DataFrame
print(f"Jumlah baris (records): {len(df)}")
print(f"Dimensi DataFrame (baris, kolom): {df.shape}")

# Menampilkan informasi kolom dan tipe data
print("\nInformasi kolom:")
print(df.info())

# # Menampilkan statistik deskriptif untuk kolom numerik
# print("\nStatistik deskriptif:")
# print(df.describe())

File CSV telah disimpan di: /content/drive/MyDrive/Research/Dataset training data.csv
Jumlah baris (records): 2656
Dimensi DataFrame (baris, kolom): (2656, 19)

Informasi kolom:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2656 entries, 0 to 2655
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   API         2656 non-null   float64
 1   A_Foto      2656 non-null   object 
 2   A_Hst       2656 non-null   int64  
 3   A_Jenis_La  2656 non-null   object 
 4   A_Kecamata  2656 non-null   object 
 5   A_Lat       2656 non-null   float64
 6   A_Lng       2656 non-null   float64
 7   A_Tgl_Kunj  2656 non-null   object 
 8   A_Tgl_Tana  2656 non-null   object 
 9   A_Varietas  2656 non-null   object 
 10  Bulan       2656 non-null   object 
 11  NDPI        2656 non-null   float64
 12  RPI         2656 non-null   float64
 13  RVI         2656 non-null   float64
 14  VH_int      2656 non-null   float64
 15  VV_int      2

### Fungsi Ekstraksi Nilai Piksel per Titik tanpa buffer





In [None]:
# Konversi GeoDataFrame ke FeatureCollection Earth Engine
def gdf_to_ee_feature_collection(gdf):
    features = []
    for _, row in gdf.iterrows():
        geom = row.geometry
        if geom.geom_type == 'Point':
            ee_geom = ee.Geometry.Point(geom.x, geom.y)
        elif geom.geom_type == 'MultiPoint':
            ee_geom = ee.Geometry.MultiPoint([ee.Geometry.Point(coord) for coord in geom.coords])
        else:
            raise ValueError("Geometry type not supported. Only Point and MultiPoint are supported.")
        feature = ee.Feature(ee_geom, row.drop('geometry').to_dict())
        features.append(feature)
    return ee.FeatureCollection(features)

In [None]:
# Fungsi untuk mengonversi citra ke float
def convert_to_float_and_scale(image):
    image_float = image.toFloat()
    image_scaled = image_float.divide(1000)
    # Salin properti metadata
    image_scaled = image_scaled.copyProperties(image, ['system:time_start', 'date'])
    return image_scaled

In [None]:
# Terapkan fungsi konversi ke seluruh ImageCollection
imageCollection = imageCollection.map(convert_to_float_and_scale)

In [None]:
# Fungsi untuk mengekstrak nilai dari titik-titik sampel (tanpa buffer)
def extract_values_from_image_collection(image_collection, point_features, scale=10):
    def extract_values(image):
        # Ekstrak nilai langsung dari titik (tanpa buffer)
        samples = image.sampleRegions(
            collection=point_features,
            scale=scale,
            geometries=True  # Menyimpan geometri titik
        )
        date = image.date().format('YYYY-MM-dd')
        samples = samples.map(lambda feature: feature.set('date', date))
        return samples

    extracted_features = image_collection.map(extract_values).flatten()
    return extracted_features

In [None]:
# Bagi titik sampel menjadi kelompok-kelompok kecil untuk menghindari timeout
def split_gdf(gdf, chunk_size=50):
    return [gdf[i:i + chunk_size] for i in range(0, len(gdf), chunk_size)]

In [None]:
# Proses ekstraksi untuk setiap kelompok titik sampel
def extract_all_points(gdf, image_collection, chunk_size=50, scale=10):
    all_results = []
    gdf_chunks = split_gdf(gdf, chunk_size)

    print(f"Total {len(gdf)} titik sampel akan diproses dalam {len(gdf_chunks)} kelompok")

    for i, chunk in enumerate(tqdm(gdf_chunks, desc="Memproses kelompok titik")):
        # Konversi chunk GeoDataFrame ke EE FeatureCollection
        ee_feature_collection = gdf_to_ee_feature_collection(chunk)

        # Ekstrak nilai langsung dari titik (tanpa buffer)
        extracted_features = extract_values_from_image_collection(
            image_collection,
            ee_feature_collection,
            scale=scale
        )

        # Ambil hasil ekstraksi
        try:
            extracted_values = extracted_features.getInfo()
            features = extracted_values['features']
            data = [feat['properties'] for feat in features]
            all_results.extend(data)
        except Exception as e:
            print(f"Error pada kelompok {i+1}: {str(e)}")
            continue

    return all_results

In [None]:
    # Simpan hasil ekstraksi
    data = [feat['properties'] for feat in features]
    all_results.extend(data)

In [None]:
# Konversi hasil ekstraksi ke pandas DataFrame
df = pd.DataFrame(all_results)

In [None]:
# Simpan DataFrame ke CSV
output_csv_path = '/content/drive/MyDrive/Research/Dataset training dataaaaa.csv'  # Sesuaikan dengan path yang diinginkan
df.to_csv(output_csv_path, index=False)

In [None]:
# Menampilkan informasi tentang penyimpanan file
print(f"File CSV telah disimpan di: {output_csv_path}")

# Menampilkan ukuran DataFrame
print(f"Jumlah baris (records): {len(df)}")
print(f"Dimensi DataFrame (baris, kolom): {df.shape}")

# Menampilkan informasi kolom dan tipe data
print("\nInformasi kolom:")
print(df.info())

# # Menampilkan statistik deskriptif untuk kolom numerik
# print("\nStatistik deskriptif:")
# print(df.describe())

File CSV telah disimpan di: /content/drive/MyDrive/Research/Dataset training dataaaaa.csv
Jumlah baris (records): 3984
Dimensi DataFrame (baris, kolom): (3984, 19)

Informasi kolom:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3984 entries, 0 to 3983
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   API         3984 non-null   float64
 1   A_Foto      3984 non-null   object 
 2   A_Hst       3984 non-null   int64  
 3   A_Jenis_La  3984 non-null   object 
 4   A_Kecamata  3984 non-null   object 
 5   A_Lat       3984 non-null   float64
 6   A_Lng       3984 non-null   float64
 7   A_Tgl_Kunj  3984 non-null   object 
 8   A_Tgl_Tana  3984 non-null   object 
 9   A_Varietas  3984 non-null   object 
 10  Bulan       3984 non-null   object 
 11  NDPI        3984 non-null   float64
 12  RPI         3984 non-null   float64
 13  RVI         3984 non-null   float64
 14  VH_int      3984 non-null   float64
 15  VV_int   