In [None]:
import pandas as pd
import math
from collections import Counter
from sklearn.metrics import mean_absolute_error


def isNaN(num):
    return num != num

def data_imputer(strategy='mean'):
    def impute(data):
        if strategy == 'mean':
            mean_val = sum([x for x in data if not isNaN(x) ]) / len([x for x in data if not isNaN(x)])
            return [x if not isNaN(x) else math.ceil(mean_val) for x in data]
        elif strategy == 'median':
            sorted_data = sorted(x for x in data if not isNaN(x))
            median_val = sorted_data[len(sorted_data) // 2]
            return [x if not isNaN(x) else math.ceil(median_val) for x in data]
        elif strategy == 'mode':
            mode_val = Counter(x for x in data if not isNaN(x)).most_common(1)[0][0]
            return [x if not isNaN(x) else math.ceil(mode_val) for x in data]
        else:
            raise ValueError("Unsupported imputation strategy")

    return impute

filename = input("Masukkan nama file (file.csv): ")
data = pd.read_csv(filename)
data= data["Import"]
# Contoh penggunaan closure
# Imputasi menggunakan mean
impute_mean = data_imputer('mean')
imputed_data_mean = impute_mean(data)

# Imputasi menggunakan median
impute_median = data_imputer('median')
imputed_data_median = impute_median(data)

# Imputasi menggunakan modus
impute_mode = data_imputer('mode')
imputed_data_mode = impute_mode(data)
# Proses pengolahan data setelah imputasi
start_index = next((i for i, x in enumerate(data) if x is not None), None)
imputed_length_mean = len(imputed_data_mean[start_index:])
imputed_length_median = len(imputed_data_median[start_index:])
imputed_length_mode = len(imputed_data_mode[start_index:])

# Hitung MAE untuk setiap metode imputasi
mae_mean = sum(abs(x - y) for x, y in zip(imputed_data_mean, imputed_data_mean)) / len(imputed_data_mean)
mae_median = sum(abs(x - y) for x, y in zip(imputed_data_mean, imputed_data_median)) / len(imputed_data_mean)
mae_mode = sum(abs(x - y) for x, y in zip(imputed_data_mean, imputed_data_mode)) / len(imputed_data_mean)


# Pilih hasil imputasi dengan RMSE terkecil sebagai hasil terbaik
hasil_imputasi_terbaik = ''
if mae_mean <= mae_median and mae_mean <= mae_mode:
    hasil_imputasi_terbaik = 'Mean'
    print("Panjang data:", imputed_length_mean)
    print("Hasil imputasi menggunakan mean:", imputed_data_mean[start_index:])
    print(f"Metode imputasi data terbaik menggunakan mean dengan MAE: {mae_mean}")
elif mae_median <= mae_mean and mae_median <= mae_mode:
    hasil_imputasi_terbaik = 'Median'
    print("Panjang data:", imputed_length_median)
    print("Hasil imputasi menggunakan median:", imputed_data_median[start_index:])
    print(f"Metode imputasi data terbaik menggunakan median dengan MAE: {mae_median}")
else:
    hasil_imputasi_terbaik = 'Modus'
    print("Panjang data:", imputed_length_mode)
    print("Hasil imputasi menggunakan modus:", imputed_data_mode[start_index:])
    print(f"Metode imputasi data terbaik menggunakan modus dengan MAE: {mae_mode}")

Masukkan nama file (file.csv): andora.csv
Panjang data: 25
Hasil imputasi menggunakan mean: [0.07, 1, 1, 1, 0.0, 0.0, 0.02, 1, 0.03, 0.01, 0.09, 0.0, 0.01, 0.03, 0.01, 0.0, 5.28, 0.0, 0.09, 1, 0.04, 0.03, 0.01, 1, 1]
Metode imputasi data terbaik menggunakan mean dengan MAE: 0.0
