In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from tslearn.metrics import dtw
from tslearn.utils import to_time_series
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [2]:
folder_name = 'cluster_timeseries/'

collected_data = ["2013-2024"]

#dtw_knn_3 -> cibeber, instead of warungkondang
'''
cibeber1: 2021 - 2024 instead of 2020-2024
cibeber2: 20 clusters, snic size: 50
cibeber3: change clustering model, snic size: 20
cibeber4: new imagecol, with Sentinel 7
campaka
evi_campaka
'''

# https://code.earthengine.google.com/3e9149cd235c6fe731fa6cd55beeab55

tahun = "2013-2024"
evi = True
filename = "warungkondang"
prefix = "evi_" + filename if evi else filename



tahun_filename = "_".join(tahun.split("-"))
sawah_df = pd.read_csv(folder_name + filename + "_" + tahun_filename +".csv").drop("system:index", axis=1)
cluster_geo = sawah_df[['cluster_id', '.geo']]
sawah_df = sawah_df.drop(".geo", axis=1)

In [3]:
def df_preprocessing(df):
    columns = df.columns.tolist()
    renamed_columns = []
    
    for col in columns:
        colname = ""
        if "T48MYT" in col:
            colname = col.split("_")[3][:8]
        elif "LC08" in col:
            colname = col.split("_")[5]
        elif "LE07" in col:
            colname = col.split("_")[3]
        renamed_columns.append(colname)
    
    renamed_columns = renamed_columns[:-1]
    
    old_new_col = dict(zip(columns, renamed_columns))
    df = df.rename(columns=old_new_col)

    # menghapus kolom yg namanya duplikat
    df = df.loc[:, ~df.columns.duplicated(keep='first')]
    
    # mengurutkan kolom berdasarkan urutan tanggalnya
    df = df.reindex(sorted(df.columns), axis=1)
    
    df = df.ffill(axis=1)
    df = df.bfill(axis=1)

    return df

In [4]:
sawah_df = df_preprocessing(sawah_df)
sawah_df.head()

Unnamed: 0,20130105,20130326,20130427,20130614,20130622,20130708,20130716,20130801,20130809,20130817,...,20241026,20241030,20241104,20241109,20241111,20241114,20241119,20241124,20241127,cluster_id
0,0.549511,0.728884,0.747286,0.606444,0.606444,0.670584,0.670584,0.676006,0.676006,0.749514,...,0.768446,0.786566,0.804012,0.764654,0.614504,0.614504,0.609691,0.609691,0.609691,2021139000.0
1,0.482693,0.671296,0.694802,0.515856,0.515856,0.458986,0.466797,0.611624,0.611624,0.742112,...,0.79775,0.811016,0.822604,0.804288,0.568341,0.568341,0.719291,0.719291,0.719291,590686200.0
2,0.457751,0.668246,0.746782,0.537287,0.537287,0.543191,0.543191,0.531227,0.531227,0.626002,...,0.651592,0.6227,0.6793,0.617885,0.413549,0.413549,0.409447,0.409447,0.409447,-579235900.0
3,0.359847,0.722421,0.73404,0.447232,0.447232,0.299024,0.299024,0.477607,0.477607,0.683935,...,0.811882,0.83174,0.837943,0.83174,0.83174,0.83174,0.83174,0.83174,0.83174,-83207280.0
4,0.750847,0.750847,0.694219,0.562726,0.562726,0.593512,0.582101,0.654644,0.654644,0.735366,...,0.809628,0.801148,0.825323,0.801148,0.801148,0.801148,0.801148,0.801148,0.801148,34695210.0


In [5]:
# load model KNN yg dari warungkondang
def dtw_score(x, y):
    x_formatted = to_time_series(x)
    # print(x_formatted)
    y_formatted = to_time_series(y)
    dtw_score = dtw(x_formatted, y_formatted)
    itakura_dtw = dtw(x_formatted, y_formatted, global_constraint="itakura", itakura_max_slope=2)
    sakoe_chiba_dtw = dtw(x_formatted, y_formatted, global_constraint="sakoe_chiba", sakoe_chiba_radius=2)
    return sakoe_chiba_dtw
    
def adjust_df(df):
    model_features = pickle.load(open(folder_name + 'saved_models/' + 'model_features.pkl', 'rb'))
    new_features = model_features.tolist()
    new_features.append('cluster_id')
    adjusted_df = df.reindex(new_features, axis=1)
    adjusted_df = adjusted_df.ffill(axis=1)
    adjusted_df = adjusted_df.bfill(axis=1)
    return adjusted_df
    
model = pickle.load(open(folder_name + 'saved_models/' + 'ts_classification.pkl', 'rb'))
sawah_df = adjust_df(sawah_df)

In [6]:
#with larger dataset
predict_all = model.predict(sawah_df.iloc[:, :-1])

In [7]:
unlabeled_df = sawah_df.copy()
unlabeled_df['label'] = predict_all

final_df = unlabeled_df.copy()
final_df = unlabeled_df[unlabeled_df.label == 'pandanwangi']

In [8]:
output_filename = filename + ".csv"
output_folder = 'classification_result/'

final_df.to_csv(folder_name + output_folder + output_filename)