In [None]:
from elice_utils import EliceUtils

elice_utils = EliceUtils()

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt 
import pandas as pd

import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from matrixprofile import *
from pmdarima.arima import auto_arima

import os
from datetime import datetime, timedelta


def interpolation_and_standardization(df, order_n):
    df_preprocessed = df.copy()
    for column in df.columns:
        # TODO: [지시사항 1-A번] 데이터의 각 feature에 polynomial interpolation을 적용합니다.
        df_preprocessed[column] = df[column].interpolate(method='polynomial',order=order_n)

    df_preprocessed.drop("New", axis=1, inplace=True)
    
    # TODO: [지시사항 1-B번] 표준화를 위한 scaler를 만들고 이를 데이터셋에 적용합니다.
    scaler = StandardScaler()
    df_preprocessed = pd.DataFrame(scaler.fit_transform(df_preprocessed), columns=df_preprocessed.columns)
    
    return df_preprocessed


def visualize_two_variables_correlation(df, first_var, second_var, length=200):
    fig = plt.subplots(figsize=(20,5))

    plt.plot(df[first_var][:length], label=first_var)
    plt.plot(df[second_var][:length], label=second_var)
    plt.legend(loc='upper left')

    plt.savefig(f"./two_{first_var}_{second_var}.png")
    elice_utils.send_image(f"./two_{first_var}_{second_var}.png")


def plot_autocorrelation_anomaly(df, var, lags=500):
    fig, ax = plt.subplots(1, 2, figsize=(20,5))
    fig.suptitle('Raw Data')
    
    # TODO: [지시사항 3-A번] var로 주어진 변수의 autocorrelation plot을 그립니다.
    plot_acf(df[var], lags=lags, ax=ax[0])
    
    # TODO: [지시사항 3-B번] var로 주어진 변수의 partial autocorrelation plot을 그립니다.
    plot_pacf(df[var], lags=lags, ax=ax[1])

    plt.savefig(f"./autocorrelation_anomaly_{var}.png")
    elice_utils.send_image(f"./autocorrelation_anomaly_{var}.png")


def plot_matrix_profile_anomaly(df, var_list, window_size=50, length=200):
    mp_list = []
    for var in var_list:
        # TODO: [지시사항 4번] scrip++ 알고리즘을 통해 변수 var의 matrix profile을 계산합니다.
        mp = matrixProfile.scrimp_plus_plus(df[var][:length].values, window_size)
        mp_list.append((var, mp))

    fig = plt.subplots(figsize=(20,5))
    for var, mp in mp_list:
        plt.plot(range(len(mp[0])), mp[0], label=var)

    plt.legend(loc='upper left')

    plt.savefig("./matrix_profile_anomaly.png")
    elice_utils.send_image("./matrix_profile_anomaly.png")


def run_and_plot_arima_anomaly(df, var, length=200):
    train_data, test_data = train_test_split(df[var][:length], test_size=0.2, shuffle=False)

    # TODO: [지시사항 5-A번] auto_arima 함수를 통해 ARIMA 모델을 만듭니다.
    auto_arima_model = auto_arima(train_data,start_p=1,start_q=1,max_p=1,max_q=1,m=24, seasonal=True, D=1, max_D = 1, trace = True, error_action='ignore',suppress_warnings=True, stepwise=True)

    # TODO: [지시사항 5-B번] 학습된 ARIMA 모델에 테스트 데이터를 적용하여 예측값을 얻습니다.
    prediction = auto_arima_model.predict(n_periods=len(test_data), return_conf_int=True)
    
    predicted_value = prediction[0]
    predicted_lb = prediction[1][:,0]
    predicted_ub = prediction[1][:,1]
    predict_index = list(test_data.index)
    r2 = r2_score(test_data, predicted_value)

    fig, ax = plt.subplots(figsize=(15,8))
    ax.plot(df[var][0:500], label=var);
    ax.vlines(predict_index[0], 0, 10, linestyle='--',color='r', label='Start of Forecast'); 
    ax.plot(predict_index, predicted_value, color='orange', label='Prediction');
    ax.fill_between(
        predict_index,
        predicted_lb,
        predicted_ub,
        color='k',
        alpha=0.1,
        label='0.90 Prediction Interval'
    )
    ax.legend(loc='upper left')
    plt.suptitle(
        f"ARIMA {auto_arima_model.order},{auto_arima_model.seasonal_order} "
        f"Prediction Results (r2_score: {round(r2,2)})"
    )

    plt.savefig(f"./auto_arima_{var}.png")
    elice_utils.send_image(f"./auto_arima_{var}.png")


def main():
    df_merged = pd.read_csv("./preprocessed_1004_0.csv")
    df_preprocessed = interpolation_and_standardization(df_merged, 2)
    
    # TODO: [지시사항 2-A번] dl_prb와 ul_prb 변수 간의 상관관계를 그래프로 그립니다.
    visualize_two_variables_correlation(df_preprocessed, "dl_prb", "ul_prb", length=200)
    
    # TODO: [지시사항 2-B번] dl_bler와 reconfig_succ_rate 변수 간의 상관관계를 그래프로 그립니다.
    visualize_two_variables_correlation(df_preprocessed, "dl_bler", "reconfig_succ_rate", length=200)

    plot_autocorrelation_anomaly(df_preprocessed, "dl_prb", lags=500)
    plot_matrix_profile_anomaly(
        df_preprocessed,
        ["dl_prb", "ul_prb", "dl_bler", "ul_bler"],
        window_size=50,
        length=200
    )
    run_and_plot_arima_anomaly(df_preprocessed, "dl_prb", length=200)

if __name__ == "__main__":
    main()