In [14]:
import pandas as pd
from sklearn.metrics import r2_score
import seaborn as sns

def plot_loss(history, save_dir=None, final_r2=None): 
    plt.title("Training loss curves")
    plt.plot(history.history['loss'], label='Train Loss') 
    plt.plot(history.history['val_loss'], label='Val Loss') 
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend() 
    plt.xticks(np.arange(0,len(history.history['loss']), 1.0), rotation=90)
    
    # if the final_r2 tuple is specified, then add descriiption for that at the bottom
    if final_r2:
        final_r2_description = f"Train R2: {final_r2[0]:.5f}        Val R2: {final_r2[1]:.5f}"
        plt.figtext(0.5, -0.08, final_r2_description, horizontalalignment='center')
    
    plt.grid(True)
    
    # potentially save to file
    if save_dir:
        plt.savefig(f"./{save_dir}/plot", bbox_inches="tight")
    plt.show()
        

# takes in a model, as well as the training data
def evaluate_regression_model(
    model,
    history,
    x_train,
    y_train,
    x_test,
    y_test
):  
    # first make the predictions
    # pass the predictions into the r2 calculation
    if history != None:
        plot_loss(history)

    train_predictions = model.predict(x_train)
    test_predictions = model.predict(x_test)
    
    # flatten if necessary
    if isinstance(train_predictions[0], list):
        train_predictions = [pred[0] for pred in train_predictions]
        test_predictions = [pred[0] for pred in test_predictions]
    
    # calculate r2 values
    train_r2 = r2_score(y_train, train_predictions)
    test_r2 = r2_score(y_test, test_predictions)
    
    print("======================================")
    print(f"Train R2 score is {train_r2:.5}")
    print(f"Test R2 score is {test_r2:.5}")
    print("======================================")
    
    # plot train predicted vs actuals
    sns.scatterplot(x=y_train, y=train_predictions, s=15)
    plt.ylabel('Predicted')
    plt.xlabel('Actual')
    plt.title("Predicted vs Actual Train Values ")
    plt.xlim([-150,150])
    plt.ylim([-150,150])
    plt.show()
    
    print("")
    
    # plot test predicted vs actuals
    # plt.plot([-100,-100], [100,100], color="black") # perfect line
    plt.plot([-90,90], [-90,90], color="orange") # perfect line
    sns.scatterplot(x=y_test, y=test_predictions, s=15)
    plt.ylabel('Predicted')
    plt.xlabel('Actual')
    plt.title(f"Predicted vs Actual Test values (r2 = {test_r2:.4f})")
    plt.xlim([-100,100])
    plt.ylim([-100,100])
    plt.show()

In [12]:
import numpy as np

def plot_feature_importance(importance,names, model_type, limit = None):
    feature_importance = np.array(importance)
    feature_names = np.array(names)

    #Create a DataFrame using a Dictionary
    data={'feature_names':feature_names,'feature_importance':feature_importance}
    fi_df = pd.DataFrame(data)

    #Sort the DataFrame in order decreasing feature importance
    fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
    
    if limit:
        fi_df = fi_df.head(limit)

    #Define size of bar plot
    # plt.figure(figsize=(10,8))
    #Plot Searborn bar chart
    sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'], palette='Accent')
    #Add chart labels
    plt.title(model_type + ' Feature Importance')
    plt.xlabel('Feature Importance')
    plt.ylabel('Feature')