In [1]:
import pandas as pd
import numpy as np
import sys, os
import matplotlib.pyplot as plt
import sklearn
from time import time

# 커스텀 모듈 import
from infer_target import InferTarget

os.mkdir('exp_results/sensitivity')

In [None]:
for n_clusters in [3, 4, 5, 6, 7]:
    gas_cluster = InferTarget('gas', 'cluster', n_clusters=n_clusters)
    gas_test = InferTarget('gas', 'test', n_clusters=n_clusters)
    elec = pd.read_csv('/root/workspace/AMI/InferProj/data/elec_clustering_test.csv', index_col=0).sum(axis=1)
    hotwater = pd.read_csv('/root/workspace/AMI/InferProj/data/hotwater_clustering_test.csv', index_col=0).sum(axis=1)
    water = pd.read_csv('/root/workspace/AMI/InferProj/data/water_clustering_test.csv', index_col=0).sum(axis=1)
    gas = pd.read_csv('/root/workspace/AMI/InferProj/data/gas_clustering_test.csv', index_col=0).sum(axis=1)
    elec.index = pd.to_datetime(elec.index)
    hotwater.index = pd.to_datetime(hotwater.index)
    water.index = pd.to_datetime(water.index)
    gas.index = pd.to_datetime(gas.index)
    # elec = elec.resample(rule='12H').sum()

    # 결과 데이터프레임 생성
    cluster_methods = ['kmedoids', 'kmeans', 'agglomerative', 'gmm']
    df_cluster = pd.DataFrame(columns=cluster_methods)
    df_test = pd.DataFrame(columns=cluster_methods)

    # AVG
    avg_cluster_mean = gas_cluster.regionA_average()
    avg_test_mean = gas_test.regionA_average()

    print('\n[AVG]')
    print(f'Cluster MAE mean: {avg_cluster_mean}')
    print(f'Test MAE mean: {avg_test_mean}')

    # RAE
    total_cluster = []
    total_test = []
    for cluster_method in cluster_methods:
        mae_cluster = []
        mae_test = []
        for i in range(101, 102):
            start = time()
            cluster, si = gas_cluster.cluster_by_RAE(cluster_method,i)
            print('time taken for cluster', cluster_method, 'iteration', i, ':', (time()-start), 'seconds')
            mae = gas_cluster.predict_target(cluster, si)[-1]
            mae_cluster.append(mae)
            cluster, si = gas_test.cluster_by_RAE(cluster_method,i)
            mae = gas_test.predict_target(cluster, si)[-1]
            mae_test.append(mae)
        
        print(f'\n[RAE - {cluster_method}]')
        print('cluster MAE mean:',(c_mean:=np.array(mae_cluster).mean()).round(10))
        print('cluster MAE std:',(c_std:=np.array(mae_cluster).std()).round(10))
        print('test MAE mean:',(t_mean:=np.array(mae_test).mean()).round(10))
        print('test MAE std:',(t_std:=np.array(mae_test).std()).round(10))
        print()
        
        total_cluster.append(c_mean)
        total_test.append(t_mean)

    df_cluster.loc['RAE'] = total_cluster
    df_test.loc['RAE'] = total_test

    # GAE
    total_cluster = []
    total_test = []
    for cluster_method in cluster_methods:
        mae_cluster = []
        mae_test = []
        for i in range(1, 2):
            start = time()
            cluster, si = gas_cluster.cluster_by_GAE(cluster_method,i)
            print('time taken for cluster', cluster_method, 'iteration', i, ':', (time()-start), 'seconds')
            mae = gas_cluster.predict_target(cluster, si)[-1]
            mae_cluster.append(mae)
            cluster, si = gas_test.cluster_by_GAE(cluster_method,i)
            mae = gas_test.predict_target(cluster, si)[-1]
            mae_test.append(mae)
        
        print(f'\n[GAE - {cluster_method}]')
        print('cluster MAE mean:',(c_mean:=np.array(mae_cluster).mean()).round(10))
        print('cluster MAE std:',(c_std:=np.array(mae_cluster).std()).round(10))
        print('test MAE mean:',(t_mean:=np.array(mae_test).mean()).round(10))
        print('test MAE std:',(t_std:=np.array(mae_test).std()).round(10))
        print()
        
        total_cluster.append(c_mean)
        total_test.append(t_mean)

    df_cluster.loc['GAE'] = total_cluster
    df_test.loc['GAE'] = total_test

    # TAE
    total_cluster = []
    total_test = []
    for cluster_method in cluster_methods:
        mae_cluster = []
        mae_test = []
        for i in range(101, 102):
            start = time()
            cluster, si = gas_cluster.cluster_by_TAE(cluster_method,i)
            print('time taken for cluster', cluster_method, 'iteration', i, ':', (time()-start), 'seconds')
            start = time()
            mae = gas_cluster.predict_target(cluster, si)[-1]
            print('time taken for prediction', cluster_method, 'iteration', i, ':', (time()-start), 'seconds')
            mae_cluster.append(mae)
            cluster, si = gas_test.cluster_by_TAE(cluster_method,i)
            mae = gas_test.predict_target(cluster, si)[-1]
            mae_test.append(mae)
        
        print(f'\n[TAE - {cluster_method}]')
        print('cluster MAE mean:',(c_mean:=np.array(mae_cluster).mean()).round(10))
        print('cluster MAE std:',(c_std:=np.array(mae_cluster).std()).round(10))
        print('test MAE mean:',(t_mean:=np.array(mae_test).mean()).round(10))
        print('test MAE std:',(t_std:=np.array(mae_test).std()).round(10))
        print()
        
        total_cluster.append(c_mean)
        total_test.append(t_mean)

    df_cluster.loc['TAE'] = total_cluster
    df_test.loc['TAE'] = total_test

    df_cluster.to_csv(f'/root/workspace/AMI/InferProj/exp_results/sensitivity/gas_cluster_{n_clusters}.csv')
    df_test.to_csv(f'/root/workspace/AMI/InferProj/exp_results/sensitivity/gas_test_{n_clusters}.csv')

    df_cluster, df_test