In [39]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from tslearn.metrics import dtw
from tslearn.utils import to_time_series
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

In [40]:
folder_name = 'cluster_timeseries/'

collected_data = ["2020-2024"]

#dtw_knn_3 -> cibeber, instead of warungkondang
'''
cibeber1: 2021 - 2024 instead of 2020-2024
cibeber2: 20 clusters, snic size: 50
cibeber3: change clustering model, snic size: 20
cibeber4: new imagecol, with Sentinel 7
campaka
'''

# https://code.earthengine.google.com/3e9149cd235c6fe731fa6cd55beeab55

filename = "cibeber4"
tahun = "2020-2024"

tahun_filename = "_".join(tahun.split("-"))
sawah_df = pd.read_csv(folder_name + filename + "_" + tahun_filename +".csv").drop("system:index", axis=1)
cluster_geo = sawah_df[['cluster_id', '.geo']]
sawah_df = sawah_df.drop(".geo", axis=1)

In [41]:
def df_preprocessing(df):
    columns = df.columns.tolist()
    renamed_columns = []
    
    for col in columns:
        colname = ""
        if "T48MYT" in col:
            colname = col.split("_")[3][:8]
        elif "LC08" in col:
            colname = col.split("_")[5]
        elif "LE07" in col:
            colname = col.split("_")[3]
        renamed_columns.append(colname)
    
    renamed_columns = renamed_columns[:-1]
    
    old_new_col = dict(zip(columns, renamed_columns))
    df = df.rename(columns=old_new_col)

    # menghapus kolom yg namanya duplikat
    df = df.loc[:, ~df.columns.duplicated(keep='first')]
    
    # mengurutkan kolom berdasarkan urutan tanggalnya
    df = df.reindex(sorted(df.columns), axis=1)
    
    df = df.ffill(axis=1)
    df = df.bfill(axis=1)

    return df

In [42]:
sawah_df = df_preprocessing(sawah_df)
sawah_df.head()

Unnamed: 0,20200115,20200117,20200120,20200125,20200214,20200218,20200219,20200224,20200229,20200305,...,20241026,20241030,20241104,20241114,20241119,20241124,20241127,20241129,20241204,cluster_id
0,0.504455,0.504455,0.504455,0.504455,0.673642,0.673642,0.673642,0.496213,0.496194,0.663482,...,0.611653,0.611653,0.498954,0.4384,0.4384,0.428327,0.428327,0.377537,0.377537,1201554000.0
1,0.460621,0.460621,0.460621,0.460621,0.460621,0.460621,0.460621,0.340074,0.340074,0.474401,...,0.488232,0.488232,0.332536,0.28949,0.28949,0.295702,0.295702,0.287676,0.287676,120484000.0
2,0.498889,0.498889,0.498889,0.498889,0.722132,0.722132,0.722132,0.679624,0.5543,0.801488,...,0.788702,0.794319,0.762558,0.575706,0.575706,0.518633,0.518633,0.458273,0.458273,363318300.0
3,0.433711,0.433711,0.433711,0.433711,0.729234,0.729234,0.729234,0.73161,0.626025,0.800045,...,0.814218,0.823721,0.799856,0.799856,0.799856,0.447364,0.447364,0.447364,0.447364,1939143000.0
4,0.496475,0.496475,0.496475,0.349678,0.569959,0.569959,0.569959,0.486205,0.44395,0.504378,...,0.619997,0.64009,0.57289,0.341597,0.341597,0.341597,0.341597,0.341597,0.341597,-2123149000.0


In [43]:
# load model KNN yg dari warungkondang
def dtw_score(x, y):
    x_formatted = to_time_series(x)
    y_formatted = to_time_series(y)
    dtw_score = dtw(x_formatted, y_formatted)
    itakura_dtw = dtw(x_formatted, y_formatted, global_constraint="itakura", itakura_max_slope=2)
    sakoe_chiba_dtw = dtw(x_formatted, y_formatted, global_constraint="sakoe_chiba", sakoe_chiba_radius=2)
    return sakoe_chiba_dtw
    
def adjust_df(df):
    model_features = pickle.load(open(folder_name + 'saved_models/' + 'model_features.pkl', 'rb'))
    new_features = model_features.tolist()
    new_features.append('cluster_id')
    adjusted_df = df.reindex(new_features, axis=1)
    adjusted_df = adjusted_df.ffill(axis=1)
    adjusted_df = adjusted_df.bfill(axis=1)
    return adjusted_df
    
model = pickle.load(open(folder_name + 'saved_models/' + 'ts_classification.pkl', 'rb'))
sawah_df = adjust_df(sawah_df)

In [None]:
#with larger dataset
print(sawah_df.shape)
predict_all = model.predict(sawah_df.iloc[:, :-1])

(113, 410)


In [None]:
unlabeled_df = sawah_df.copy()
unlabeled_df['label'] = predict_all

final_df = unlabeled_df.copy()
final_df = unlabeled_df[unlabeled_df.label == 'pandanwangi']

In [None]:
output_filename = filename + ".csv"
output_folder = 'classification_result/'

final_df.to_csv(folder_name + output_folder + output_filename)