In [1]:
# 4/30/21
# Generate Plots Sub File

# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import datetime

plt.style.use('ggplot')

In [2]:
""" Function that takes input credit card dataset and stores a set of images depicting the data
Inputs: Pandas Dataframe containing information about credit card transactions 
Outputs: None, but stores multiple plots in the "Plots" folder showing the distribution of the values in the dataframe
"""
def gen_distribution(df): 
    fig = plt.figure()
    

In [3]:
""" Function that creates a plot of the transaction amount and transaction time distributions
Inputs: Credit Card Fraud Dataframe
Outputs: None but stores two distribution in Plots folder
"""
def gen_amount_time_plot(df):
    fig, ax = plt.subplots(1,2,figsize=(18,4))
    
    # Transfer data
    amount=df['Amount'].values
    time=df['Time'].values
    
    # Amount Plot
    sns.distplot(amount, ax=ax[0])
    ax[0].set_title('Transaction Amount Distribution')
    ax[0].set_xlim([min(amount),max(amount)])
    
    # Time Plot
    sns.distplot(time,ax=ax[1])
    ax[1].set_title('Transaction Time Distribution')
    ax[1].set_xlim([min(time),max(time)])
    
    # Save plot
    fig.savefig('Plots/Transaction Time and Amount Distributions.png', dpi = 175)

In [3]:
""" Function that creates a plot to show the amount of fraud vs clean transactions in the dataset.
Inputs: Credit Card Fraud Dataframe
Outputs: None but stores a plot in the Plots folder
"""
def gen_data_class(df):
    
    # Amount Plot
    sns.countplot('Class', data=df, palette=["#0101DF", "#DF0101"])
    plt.title('Fraud Classification \n (0: No Fraud || 1: Fraud)',          fontsize=14)

    # Save plot
    plt.savefig('Plots/Data Classification.png')

In [4]:
""" Function that creates a heatmap for the input features
Input: Dataframe
Outputs: None but stores a heatmap plot in Plots folder
"""
def gen_heatmap_plot(df,runset_name):
    fig=plt.plot(figsize=(24,20))
    
    #create correlation
    corr=df.corr()
    sns.heatmap(corr,cmap='coolwarm_r',annot_kws={'size':20})
    plt.title(str(runset_name)+" Dataset Heatmap")
    
    plt.savefig('Plots/'+str(runset_name)+'_heatmap_plot')

In [5]:
""" Function that creates a bar chart of Feature Importance for the Random Forest
Inputs: Feature Importance Series
Outputs: None but stores a bar chart showing feature importance in the Plots folder
"""
def gen_feature_plot(df,sr, runset_name):
    plt.plot(figsize=(24,20))

    feature_imp=pd.DataFrame({'Feature':df.columns[0:30], 'Feature Importance':sr})
    feature_imp=feature_imp.sort_values(by='Feature Importance',ascending=False)
    
    g = sns.barplot(x='Feature',y='Feature Importance',data=feature_imp)
    g.set_xticklabels(g.get_xticklabels(),rotation=90)
    g.set_title('Feature Importance: '+str(runset_name))
    
    plt.savefig('Plots/'+str(runset_name)+'_feature_importance_plot')