<a href="https://colab.research.google.com/github/angel870326/Monthly-Revenue-Forecasting/blob/main/Function/MonRevPlot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

> 2023.04.05 Ssu-Yun Wang<br/>
[Github @angel870326](https://github.com/angel870326)

# **Monthly Revenue Forecasting with Benchmark, Random Forest Regressor, XGB Regressor, RNN, LSTM & DNN - Plots**

### Contents

##### 8. Plots
*   8.1 Expected v.s. Predicted
*   8.2 Scores: RMSE, MAPE, MAE, MAE%
    *   8.2.1 Plot All Scores
    *   8.2.2 分模型畫圖
    *   8.2.3 Boxplot
*   8.3 Feature Importance

##### 9. Best and Worst Model - Feature Importance

<br>

2023.04.08 updated for Communications

2023.04.15 updated for RNN and LSTM

2023.05.13 updated for DNN

2023.05.26 updated for Benchmark


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Color
blue_hex = '#3265ff'
green_hex = '#4caf50'
orange_hex = '#ff9f02'    #ffae29

purple_hex = '#ad99e6'
pink_hex = '#ff7dbe'
brown_hex = '#997d6c'

In [None]:
def getLineStyleColor(dataName: str, modelName: str):
    if (modelName == 'Random Forest') or (modelName == 'RNN'):
        if dataName == 'org':
            style = 'dashed'
            color = brown_hex
        elif dataName == 'def':
            style = 'solid'
            color = brown_hex
        elif dataName == 'dec':
            style = 'dashed'
            color = purple_hex
        elif dataName == 'dec_def':
            style = 'solid'
            color = purple_hex
        elif dataName == 'season':
            style = 'dashed'
            color = pink_hex
        elif dataName == 'season_def':
            style = 'solid'
            color = pink_hex

    elif (modelName == 'XGB') or (modelName == 'LSTM') or (modelName == 'DNN'):
        if dataName == 'org':
            style = 'dashed'
            color = blue_hex
        elif dataName == 'def':
            style = 'solid'
            color = blue_hex
        elif dataName == 'dec':
            style = 'dashed'
            color = green_hex
        elif dataName == 'dec_def':
            style = 'solid'
            color = green_hex
        elif dataName == 'season':
            style = 'dashed'
            color = orange_hex
        elif dataName == 'season_def':
            style = 'solid'
            color = orange_hex
    elif modelName == 'Benchmark':
        style = 'solid'
        color = blue_hex

    return style, color

In [None]:
# Figure size x (2023.04.08)
comms_x = 30
comms_x_large = 40

## **8.1 Expected v.s. Predicted**

In [None]:
# 畫出 Random Forest 或 XGB 或 RNN 或 LSTM 或 DNN 或 Benchmark 個別資料處理方式下的預測結果

def plotPredictedValues(data: pd.DataFrame, name: str):

    if name == 'benchmark':
        modelName = 'Benchmark'
        # Get line style and color
        style, color = getLineStyleColor(name, modelName)

        data_mean = data.mean(axis=0)
        plt.plot(data_mean.index.to_series().astype(str), data_mean.values, label = f'{modelName}', linestyle = style, c = color)

    else:
        # Get model name
        if name.split("_")[-1] == 'rf':
            modelName = 'Random Forest'
        elif name.split("_")[-1] == 'xgb':
            modelName = 'XGB'
        elif name.split("_")[-1] == 'rnn':
            modelName = 'RNN'
        elif name.split("_")[-1] == 'lstm':
            modelName = 'LSTM'
        elif name.split("_")[-1] == 'dnn':
            modelName = 'DNN'

        # Get line style and color
        style, color = getLineStyleColor(name.rsplit("_", maxsplit=1)[0], modelName)

        # Get data name
        if name.rsplit("_", maxsplit=1)[0] == 'org':
            dataName = 'original'
        elif name.rsplit("_", maxsplit=1)[0] == 'def':
            dataName = 'deflating'
        elif name.rsplit("_", maxsplit=1)[0] == 'dec':
            dataName = 'decomposed'
        elif name.rsplit("_", maxsplit=1)[0] == 'dec_def':
            dataName = 'decomposed_def'
        elif name.rsplit("_", maxsplit=1)[0] == 'season':
            dataName = 'seasonal_adj'
        elif name.rsplit("_", maxsplit=1)[0] == 'season_def':
            dataName = 'seasonal_def'

        data_mean = data.mean(axis=0)
        plt.plot(data_mean.index.to_series().astype(str), data_mean.values, label = f'{modelName} - {dataName}', linestyle = style, c = color)

In [None]:
# 畫出 Random Forest 或 XGB 或 RNN 或 LSTM 不同資料處理方式下的預測結果線

def plotModelPredictedValues(pred, modelName: str):
    if modelName == 'benchmark':    # pred is pd.DataFrame
        plotPredictedValues(pred, f'{modelName}')
    else:   # pred is dict
        plotPredictedValues(pred['org']['org'], f'org_{modelName}')
        plotPredictedValues(pred['def']['org'], f'def_{modelName}')
        plotPredictedValues(pred['org']['dec'], f'dec_{modelName}')
        plotPredictedValues(pred['def']['dec'], f'dec_def_{modelName}')
        plotPredictedValues(pred['org']['season'], f'season_{modelName}')
        plotPredictedValues(pred['def']['season'], f'season_def_{modelName}')

In [None]:
# def plotExpectedPredictedRevenues(originalD: pd.DataFrame, org_rf: pd.DataFrame, def_rf: pd.DataFrame, dec_rf: pd.DataFrame, dec_def_rf: pd.DataFrame, season_rf: pd.DataFrame, season_def_rf: pd.DataFrame, org_xgb: pd.DataFrame, def_xgb: pd.DataFrame, dec_xgb: pd.DataFrame, dec_def_xgb: pd.DataFrame, season_xgb: pd.DataFrame, season_def_xgb: pd.DataFrame, test_year: int):
def plotExpectedPredictedRevenues(originalD: pd.DataFrame, mod1: dict, mod2: dict, test_year: int, nn: bool = False):
    '''
    畫出 Random Forest 和 XGB 不同資料處理方式下的預測結果。
    畫出 RNN 和 LSTM 不同資料處理方式下的預測結果。

    Parameters
    ----------
    originalD: 原始資料集,
    mod1: Random Forest 或 RNN 的預測結果,
    mod2: XGB 或 LSTM 的預測結果,
    test_year: 測試資料開始年份,
    nn: 是不是 RNN 和 LSTM (default = False)
    '''
    if test_year == 2007:
        org_mean = originalD.iloc[:, -192:].mean(axis=0)  # 2007-01
    elif test_year == 2018:
        org_mean = originalD.iloc[:, -60:].mean(axis=0)   # 2018-01
    elif test_year == 2019:
        org_mean = originalD.iloc[:, -47:].mean(axis=0)   # 2019-02
    elif test_year == 2020:
        org_mean = originalD.iloc[:, -36:].mean(axis=0)   # 2020-01

    # Plot
    if test_year == 2007:
        plt.figure(figsize = (comms_x_large,10))
    else:
        plt.figure(figsize = (15,10))
    plt.title(f"Expected v.s Predicted Monthly Revenues ({test_year}-2022)",size = 14)
    plt.plot(org_mean.index.to_series().astype(str), org_mean.values, label = "Expected", color = 'r')

    if nn == True:
        plotModelPredictedValues(mod1, 'rnn')
        plotModelPredictedValues(mod2, 'lstm')
    else:
        plotModelPredictedValues(mod1, 'rf')
        plotModelPredictedValues(mod2, 'xgb')
        # plotModelPredictedValues(org_rf, def_rf, dec_rf, dec_def_rf, season_rf, season_def_rf, 'rf')
        # plotModelPredictedValues(org_xgb, def_xgb, dec_xgb, dec_def_xgb, season_xgb, season_def_xgb, 'xgb')

    plt.xticks(range(0, len(org_mean.index), 2), org_mean.index[::2], rotation=90)
    plt.legend(bbox_to_anchor = (1,1),loc = 'upper left')
    plt.show()

In [None]:
# def plotModelExpectedPredictedRevenues(originalD: pd.DataFrame, org: pd.DataFrame, deflate: pd.DataFrame, dec: pd.DataFrame, dec_def: pd.DataFrame, season: pd.DataFrame, season_def: pd.DataFrame, modelName: str, test_year: int):
def plotModelExpectedPredictedRevenues(originalD: pd.DataFrame, pred: dict, modelName: str, test_year: int):
    '''
    畫出 Random Forest 或 XGB 或 RNN 或 LSTM 或 DNN 或 Benchmark 不同資料處理方式下的預測結果。

    Parameters
    ----------
    originalD: 原始資料集,
    pred: 預測結果,
    modelName: | 'rf' | 'xgb' | 'rnn' | 'lstm' | 'dnn' | 'benchmark' |,
    test_year: 測試資料開始年份,
    '''
    if test_year == 2007:
        org_mean = originalD.iloc[:, -192:].mean(axis=0)  # 2007-01
    elif test_year == 2018:
        org_mean = originalD.iloc[:, -60:].mean(axis=0)   # 2018-01
    elif test_year == 2019:
        org_mean = originalD.iloc[:, -47:].mean(axis=0)   # 2019-02
    elif test_year == 2020:
        org_mean = originalD.iloc[:, -36:].mean(axis=0)   # 2020-01

    if modelName == 'rf':
        name = 'Random Forest'
    elif modelName == 'xgb':
        name = 'XGB'
    elif modelName == 'rnn':
        name = 'RNN'
    elif modelName == 'lstm':
        name = 'LSTM'
    elif modelName == 'dnn':
        name = 'DNN'
    elif modelName == 'benchmark':
        name = 'Benchmark'

    # Plot
    if test_year == 2007:
        plt.figure(figsize = (comms_x_large,10))
    else:
        plt.figure(figsize = (15,10))
    plt.title(f"Expected v.s Predicted Monthly Revenues - {name} ({test_year}-2022)", size = 14)
    plt.plot(org_mean.index.to_series().astype(str), org_mean.values, label = "Expected", color = 'r')
    plotModelPredictedValues(pred, modelName)
    # plotModelPredictedValues(org, deflate, dec, dec_def, season, season_def, modelName)
    plt.xticks(range(0, len(org_mean.index), 2), org_mean.index[::2], rotation=90)
    plt.legend(bbox_to_anchor = (1,1),loc = 'upper left')
    plt.show()

## **8.2 Scores: RMSE, MAPE, MAE, MAE%**

In [None]:
# 畫出 Random Forest 或 XGB 或 RNN 或 LSTM 或 DNN 或 Benchmark 不同資料處理方式下的單一預測分數

def plotModelLine(scoresD: pd.DataFrame, score: str, modelName: str):

    # Plot line
    if modelName == 'Benchmark':
        plt.plot(scoresD.index, f'{score}-benchmark', data = scoresD, label = f"{modelName}", linestyle = getLineStyleColor('benchmark', modelName)[0], c = getLineStyleColor('benchmark', modelName)[1])
    else:
        plt.plot(scoresD.index, f'{score}-org', data = scoresD, label = f"{modelName} - org", linestyle = getLineStyleColor('org', modelName)[0], c = getLineStyleColor('org', modelName)[1])
        plt.plot(scoresD.index, f'{score}-def', data = scoresD, label = f"{modelName} - deflating", linestyle = getLineStyleColor('def', modelName)[0], c = getLineStyleColor('def', modelName)[1])
        plt.plot(scoresD.index, f'{score}-dec', data = scoresD, label = f"{modelName} - decomposed", linestyle = getLineStyleColor('dec', modelName)[0], c = getLineStyleColor('dec', modelName)[1])
        plt.plot(scoresD.index, f'{score}-dec_def', data = scoresD, label = f"{modelName} - decomposed_def", linestyle = getLineStyleColor('dec_def', modelName)[0], c = getLineStyleColor('dec_def', modelName)[1])
        plt.plot(scoresD.index, f'{score}-season', data = scoresD, label = f"{modelName} - seaonal_adj", linestyle = getLineStyleColor('season', modelName)[0], c = getLineStyleColor('season', modelName)[1])
        plt.plot(scoresD.index, f'{score}-season_def', data = scoresD, label = f"{modelName} - seaonal_def", linestyle = getLineStyleColor('season_def', modelName)[0], c = getLineStyleColor('season_def', modelName)[1])


### **8.2.1 Plot All Scores**

In [None]:
# 畫出 Random Forest 和 XGB 不同資料處理方式下的單一預測分數線
# 畫出 RNN 和 LSTM 不同資料處理方式下的單一預測分數線

def plotAllModelScore(mod1: pd.DataFrame, mod2: pd.DataFrame, score: str, test_y: int, nn: bool):
    if test_y == 2007:
        plt.figure(figsize = (comms_x,4))
    else:
        plt.figure(figsize = (15,4))

    if nn == True:
        plt.title(f"{score} of RNN and LSTM \n with Different Types of Data ({test_y}-2022)", size = 14)
        plotModelLine(mod1, score, 'RNN')
        plotModelLine(mod2, score, 'LSTM')
    else:
        plt.title(f"{score} of Random Forest and XGB \n with Different Types of Data ({test_y}-2022)", size = 14)
        plotModelLine(mod1, score, 'Random Forest')
        plotModelLine(mod2, score, 'XGB')

    plt.xticks(range(0, len(mod1.index), 2), mod2.index[::2], rotation=90)
    plt.legend(bbox_to_anchor = (1,1), loc = 'upper left')
    plt.show()

In [None]:
def plotAllScores(mod1: pd.DataFrame, mod2: pd.DataFrame, test_y: int, nn: bool = False):
    '''
    畫出 Random Forest 和 XGB 不同資料處理方式下的所有預測分數。
    畫出 RNN 和 LSTM 不同資料處理方式下的所有預測分數。

    Parameters
    ----------
    mod1: Random Forest 或 RNN 的預測分數,
    mod2: XGB 或 LSTM 的預測分數,
    test_y: 測試資料開始年,
    nn: 是不是 RNN 和 LSTM (default = False)
    '''
    # RMSE
    plotAllModelScore(mod1, mod2, 'RMSE', test_y, nn)
    # MAPE
    plotAllModelScore(mod1, mod2, 'MAPE', test_y, nn)
    # MAE
    plotAllModelScore(mod1, mod2, 'MAE', test_y, nn)
    # MAE%
    plotAllModelScore(mod1, mod2, 'MAE%', test_y, nn)

### **8.2.2 分模型畫圖**

In [None]:
def plotModelScore(scoreD: pd.DataFrame, modelName: str, score: str, test_y: int):
    '''
    畫出單一模型 (Random Forest 或 XGB 或 RNN 或 LSTM 或 DNN 或 Benchmark) 不同資料處理方式下的單一預測分數。

    Parameters
    ----------
    scoreD: 單一模型的預測分數,
    modelName: | 'Random Forest' | 'XGB' | 'RNN' | 'LSTM' | 'DNN' | 'Benchmark' |,
    score: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |
    test_y: 測試資料開始年
    '''
    if test_y == 2007:
        plt.figure(figsize = (comms_x,4))
    else:
        plt.figure(figsize = (15,4))
    plt.title(f"{score} of {modelName} \n with Different Types of Data ({test_y}-2022)", size = 14)
    plotModelLine(scoreD, score, modelName)
    plt.xticks(range(0, len(scoreD.index), 2), scoreD.index[::2], rotation=90)
    plt.legend(bbox_to_anchor = (1,1),loc = 'upper left')
    plt.show()

In [None]:
def compareModelScore(mod1: pd.DataFrame, mod2: pd.DataFrame, score: str, test_y: int, nn: bool = False):
    '''
    畫出 Random Forest 和 XGB 不同資料處理方式下的單一預測分數。
    畫出 RNN 和 LSTM 不同資料處理方式下的單一預測分數。

    Parameters
    ----------
    mod1: Random Forest 或 RNN 的預測分數,
    mod2: XGB 或 LSTM 的預測分數,
    score: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |
    test_y: 測試資料開始年,
    nn: 是不是 RNN 和 LSTM (default = False)
    '''
    if nn == True:
        plotModelScore(mod1, 'RNN', score, test_y)
        plotModelScore(mod2, 'LSTM', score, test_y)
    else:
        plotModelScore(mod1, 'Random Forest', score, test_y)
        plotModelScore(mod2, 'XGB', score, test_y)

### **8.2.3 Boxplot**

In [None]:
def scoreBoxplotV(mod1: pd.DataFrame, mod2: pd.DataFrame, score: str, test_y: int, nn: bool = False):
    '''
    畫出 Random Forest 和 XGB 不同資料處理方式下的預測分數 Boxplot。
    畫出 RNN 和 LSTM 不同資料處理方式下的預測分數 Boxplot。

    Parameters
    ----------
    mod1: Random Forest 或 RNN 的預測分數,
    mod2: XGB 或 LSTM 的預測分數,
    score: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |
    test_y: 測試資料開始年
    nn: 是不是 RNN 和 LSTM (default = False)
    '''
    plt.figure(figsize = (15,4))
    plt.title(f"{score} ({test_y}-2022)", size = 14)
    col = [f'{score}-org', f'{score}-def', f'{score}-dec', f'{score}-dec_def', f'{score}-season', f'{score}-season_def']
    scoreD = pd.concat([mod1[col], mod2[col]], axis=1)
    plt.boxplot(scoreD)

    if nn == True:
        plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], ['RNN - \n orginal', 'RNN - \n deflating', 'RNN - \n decomposed', 'RNN - \n decomposed_def', 'RNN - \n seasonal_adj', 'RNN - \n seasonal_def', 'LSTM - \n orginal', 'LSTM - \n deflating', 'LSTM - \n decomposed', 'LSTM - \n decomposed_def', 'LSTM - \n seasonal_adj', 'LSTM - \n seasonal_def'], rotation = 25)
    else:
        plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], ['Random Forest - \n orginal', 'Random Forest - \n deflating', 'Random Forest - \n decomposed', 'Random Forest - \n decomposed_def', 'Random Forest - \n seasonal_adj', 'Random Forest - \n seasonal_def', 'XGB - orginal', 'XGB - deflating', 'XGB - decomposed', 'XGB - decomposed_def', 'XGB - seasonal_adj', 'XGB - \n seasonal_def'], rotation = 25)

    plt.show()

In [None]:
# 2023.05.13
def modelScoreBoxplotV(scoreD: pd.DataFrame, score: str, test_y: int, benchmark: bool = False):
    '''
    畫出單一模型 (Random Forest 或 XGB 或 RNN 或 LSTM 或 DNN 或 Benchmark) 不同資料處理方式下的預測分數 Boxplot。

    Parameters
    ----------
    scoreD: 單一模型的預測分數,
    score: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |,
    test_y: 測試資料開始年,
    benchmark: 是否為 benchmark (default = False)
    '''
    if benchmark == True:
        plt.figure(figsize = (8,4))
        plt.title(f"{score} ({test_y}-2022)", size = 14)
        col = [f'{score}-benchmark']
        scoreD = scoreD[col]
        plt.boxplot(scoreD)
        plt.xticks([1], ['Benchmark'], rotation = 0)
        plt.show()
    else:
        plt.figure(figsize = (15,4))
        plt.title(f"{score} ({test_y}-2022)", size = 14)
        col = [f'{score}-org', f'{score}-def', f'{score}-dec', f'{score}-dec_def', f'{score}-season', f'{score}-season_def']
        scoreD = scoreD[col]
        plt.boxplot(scoreD)
        plt.xticks([1, 2, 3, 4, 5, 6], ['DNN - orginal', 'DNN - deflating', 'DNN - decomposed', 'DNN - decomposed_def', 'DNN - seasonal_adj', 'DNN - seasonal_def'], rotation = 25)
        plt.show()

## **8.3 Feature Importance**

In [None]:
# 畫出 Random Forest 或 XGB 個別資料處理方式下的 Top k 重要變數

def plotModelFeatureImportance(featureD: pd.DataFrame, modelName: str, dataName: str, detail: str, k: int = 48):
    imp_mean = featureD.abs().mean(axis=1)     # 對所有年月的變數重要性做平均
    imp_mean = imp_mean.sort_values()    # 由小到大做排序
    imp_mean = imp_mean.iloc[-k:, ]      # top k

    plt.title(f"Feature Importance of {modelName} - \n{dataName} ({detail})", size = 14)
    plt.barh(y = imp_mean.index, width = imp_mean.values, data = imp_mean, color = purple_hex)
    for a, b in zip(range(imp_mean.shape[0]), imp_mean.values):
        plt.text(b*1.01, a-0.25, '%.3f' % b)
    plt.xlim([0, max(imp_mean)*1.15])
    plt.ylim([-0.65, imp_mean.shape[0]])
    plt.xticks(rotation = 45)

In [None]:
def plotFeatureImportance(feature_rf: pd.DataFrame, feature_xgb: pd.DataFrame, dataName: str, test_year: int, k: int = 48):
    '''
    畫出 Random Forest 和 XGB 個別資料處理方式下的 Top k 重要變數。

    Parameters
    ----------
    feature_rf, feature_xgb: Random Forest 和 XGB 在單一資料處理方式下的變數重要性,
    dataName: | 'org' | 'def' | 'dec' | 'dec_def' | 'season' | 'season_def' |,
    test_year: 測試資料集開始年份,
    k: <= 49 (default = 48)
    '''

    if dataName == 'org':
        name = 'Original Data'
    elif dataName == 'def':
        name = 'Deflating Data'
    elif dataName == 'dec':
        name = 'Decomposed Data'
    elif dataName == 'dec_def':
        name = 'Decomposed Data + Deflating'
    elif dataName == 'season':
        name = 'Data without Seasonality'
    elif dataName == 'season_def':
        name = 'Data without Seasonality + Deflating'

    if k > 49:
        print("Please select a number smaller than or equal to 49 for top k.")
    else:
        plt.figure(figsize=(15,10))
        # Random Forest - Original
        plt.subplot(1,2,1)
        plotModelFeatureImportance(feature_rf, 'Random Forest Regressor', name, f'{test_year}-2022', k)
        # XGB - Original
        plt.subplot(1,2,2)
        plotModelFeatureImportance(feature_xgb, 'XGB Regressor', name, f'{test_year}-2022', k)
        plt.show()

## **9. Best and Worst Model - Feature Importance**
2023.04.05

In [None]:
class BestWorstModelFeatureImportancePlot():
    '''
    Parameters
    ----------
    feature_rf, feature_xgb: Random Forest 和 XGB 在各資料處理方式下的變數重要性。
    best_rf, worst_rf: Random Forest 在各個衡量指標下最好和最差的模型。
    best_xgb, worst_xgb: Random Forest 在各個衡量指標下最好和最差的模型。
    test_year: 測試資料開始年份。

    Methods
    ----------
    plotBestWorstFeatureImportance(scoreType: str, k: int, all: bool):
        scoreType: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |,
        k: Top k 重要變數 <= 10 (default = 5),
        all: 是否為找出所有模型中最好和最佳模型 (default = False)
    '''

    def __init__(self, feature_rf: pd.DataFrame, feature_xgb: pd.DataFrame, best_rf: pd.DataFrame, worst_rf: pd.DataFrame, best_xgb: pd.DataFrame, worst_xgb: pd.DataFrame, best_all: pd.DataFrame, worst_all: pd.DataFrame, test_year: int):
        self.feature_rf = feature_rf
        self.feature_xgb = feature_xgb
        self.best_rf = best_rf
        self.worst_rf = worst_rf
        self.best_xgb = best_xgb
        self.worst_xgb = worst_xgb
        self.best_all = best_all
        self.worst_all = worst_all
        self.test_year = test_year


    def plotSingleFeatureImportance(self, feature_importance: dict, modelName: str, dataType: str, month: str, best_or_worst: str, k: int):
        '''
        畫出 best 或 worst model 的 feature importance。
        '''
        # Data Type
        if dataType == 'org':
            name = 'Original Data'
            key1 = 'org'
            key2 = 'org'
        elif dataType == 'def':
            name = 'Deflating Data'
            key1 = 'def'
            key2 = 'org'
        elif dataType == 'dec':
            name = 'Decomposed Data'
            key1 = 'org'
            key2 = 'dec'
        elif dataType == 'dec_def':
            name = 'Decomposed Data + Deflating'
            key1 = 'def'
            key2 = 'dec'
        elif dataType == 'season':
            name = 'Data without Seasonality'
            key1 = 'org'
            key2 = 'season'
        elif dataType == 'season_def':
            name = 'Data without Seasonality + Deflating'
            key1 = 'def'
            key2 = 'season'

        feature = feature_importance[key1][key2][month]   # 最好 or 最差模型的 feature importance
        feature = feature.sort_values()                     # 由小到大做排序
        feature = feature.iloc[-k:, ]                       # Top k

        # Plot
        plt.title(f"Feature Importance of {modelName} {best_or_worst} Model - \n{name} ({self.test_year}-2022)", size = 14)
        plt.barh(y = feature.index, width = feature.values, data = feature, color = purple_hex)
        for a, b in zip(range(feature.shape[0]), feature.values):
            plt.text(b*1.01, a-0.1, '%.3f' % b)
            # plt.text(b*1.01, a-0.25, '%.3f' % b)
        plt.xlim([0, max(feature)*1.15])
        plt.ylim([-0.65, feature.shape[0]])
        plt.xticks(rotation = 45)


    def plotModelBestWorstFeatureImportance(self, modelName: str, dataType: list, month: list, k: int):
        '''
        畫出 Random Forest 或 XGB 最好和最差模型的 Top k 重要變數。

        Parameters
        ----------
        modelName: | 'Random Forest' | 'XGB' |,
        dataType: [最佳模型的 data type, 最差模型的 data type],
        month: [最佳模型的月份, 最差模型的月份],
        k: Top k 重要變數 <= 10 (default = 5)
        '''

        # Model
        if modelName == "Random Forest":
            feature_importance = self.feature_rf
        elif modelName == "XGB":
            feature_importance = self.feature_xgb

        # Plot
        plt.figure(figsize=(15,3))
        # Model - Best
        plt.subplot(1,2,1)
        self.plotSingleFeatureImportance(feature_importance, modelName, dataType[0], month[0], "Best", k)
        # Model - Worst
        plt.subplot(1,2,2)
        self.plotSingleFeatureImportance(feature_importance, modelName, dataType[1], month[1], "Worst", k)
        plt.show()


    def plotBestWorstFeatureImportance(self, scoreType: str, k: int = 5, all: bool = False):
        '''
        畫出 Random Forest 和 XGB 最好和最差模型的 Top k 重要變數。

        Parameters
        ----------
        scoreType: | 'RMSE' | 'MAPE' | 'MAE' | 'MAE%' |,
        k: Top k 重要變數 <= 10 (default = 5),
        all: 是否為找出所有模型中最好和最佳模型 (default = False)
        '''

        if k > 10:
            print("Please select a number smaller than or equal to 10 for top k.")
        else:
            if all == False:
                dataName_rf = [self.best_rf[scoreType].iloc[0]['dataType'], self.worst_rf[scoreType].iloc[0]['dataType']]
                dataName_xgb = [self.best_xgb[scoreType].iloc[0]['dataType'], self.worst_xgb[scoreType].iloc[0]['dataType']]
                month_rf = [self.best_rf[scoreType].iloc[0]['min_month'], self.worst_rf[scoreType].iloc[0]['max_month']]
                month_xgb = [self.best_xgb[scoreType].iloc[0]['min_month'], self.worst_xgb[scoreType].iloc[0]['max_month']]
                # Random Forest
                self.plotModelBestWorstFeatureImportance('Random Forest', dataName_rf, month_rf, k)
                # XGB
                self.plotModelBestWorstFeatureImportance('XGB', dataName_xgb, month_xgb, k)

            else:
                dataName_all = [self.best_all[scoreType].iloc[0]['dataType'], self.worst_all[scoreType].iloc[0]['dataType']]
                month_all = [self.best_all[scoreType].iloc[0]['min_month'], self.worst_all[scoreType].iloc[0]['max_month']]
                self.plotModelBestWorstFeatureImportance(self.best_all[scoreType].iloc[0]['modelName'], dataName_all, month_all, k)
