# This notebook contains all relevant functions for generating plots.

In [1]:
# import relevant functions

import matplotlib.pyplot as plt
import numpy as np

# General Functions

In [3]:
"""
This function sets the size of a plot.

Inputs:
w (integer): width of the plot
h (integer): height of the plot

Outputs:
None
"""
def set_size(w,h, ax=None):
    """ w, h: width, height in inches """
    if not ax: ax=plt.gca()
    l = ax.figure.subplotpars.left
    r = ax.figure.subplotpars.right
    t = ax.figure.subplotpars.top
    b = ax.figure.subplotpars.bottom
    figw = float(w)/(r-l)
    figh = float(h)/(t-b)
    ax.figure.set_size_inches(figw, figh)

In [8]:
"""
This function generates a list of feature names with significant p values from prediction.

Inputs: 
df (pandas dataframe): dataframe of results

Outpus:
sig_p (list of strings): list of features with significant p values from prediction
"""
def get_sig_p(df):
    sig_p = []
    for index, row in df.iterrows():
        if row['p_median'] <= 0.05:
            sig_p.append(row['feature'])
    return sig_p

# Individual Plot Functions

In [None]:
"""
This function plots individual feature results on matplotlib.

Inputs:
df (pandas dataframe): dataframe of results
text_features (list of strings): list of text feature names 
path (string): path to store plot
my_model (string): name of model
dataframe_type (string): Full or Complete

Outputs:
plot (matplotlib plot): plot of results
"""

def individual_plot(df, text_features, path, my_model, dataframe_type):
    df = df.sort_values('r_median', ascending=False).dropna(subset=['r_median'])
    sig_p = get_sig_p(df)
    
    plt.rcdefaults()
    fig, ax = plt.subplots()
    x = df['r_median'].values
    y = df['feature'].apply(lambda i: i + "*" if i in sig_p else i).values
    c1 = df['r_range'].apply(lambda k: k[0]).values
    c2 = df['r_range'].apply(lambda j: j[1]).values

    c3 = []
    c4 = []
    for i in range(len(x)):
        c3.append(x[i] - c1[i])
        c4.append(c2[i] - x[i])
    c_ = [c3, c4]

    color_list = []
    for f in df['feature']:
        if f in text_features:
            color_list.append('blue')
        else:
            color_list.append('gray')

    ax.scatter(x, y, c=color_list)
    set_size(5,9)

    ax.errorbar(x, y, xerr=c_, fmt="none", ecolor='black', capsize = 1.5, elinewidth=0.2)

    if dataframe_type == 'Full':
        for i in range(len(x)+1):
            if i == len(x):
                ax.text(x=.455, y=-1.6, s="r median")
            else:
                ax.text(x=.455, y=i+.2+.001*(i), s=round(x[i], 3))

        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Pearson r')
        ax.set_title('Individual Feature Prediction (Full, {})'.fomat(my_model))

        fig.savefig(path + 'individual_prediction_full.png', bbox_inches='tight')
    else:
        for i in range(len(x)+1):
            if i == len(x):
                ax.text(x=.515, y=-1.6, s="r median")
            else:
                ax.text(x=.515, y=i+.2+.001*(i), s=round(x[i], 3))
            
        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Pearson r')
        ax.set_title('Individual Feature Prediction (Complete, {})'.fomat(my_model))

        fig.savefig(path + 'individual_prediction_complete.png',bbox_inches='tight')
    
    return fig

# Umbrella Plot Functions

In [4]:
"""
This function plots umbrella feature results on matplotlib.

Inputs:
df (pandas dataframe): dataframe of results
path (string): path to store plot
my_model (string): name of model

Outputs:
plot (matplotlib plot): plot of results
"""
def umbrella_plot(df, path, my_model):
    umbrella = df.sort_values('r_median', ascending=False).dropna(subset=['r_median'])
    indexNames = umbrella[umbrella['feature'].isin(['all_text_addendums', 'all_text', 'all_text_words', 'all_target_features', 'all_friends'])].index
    umbrella = umbrella.drop(indexNames)
    sig_p = get_sig_p(umbrella)

    plt.rcdefaults()
    fig, ax = plt.subplots()
    x = umbrella['r_median'].values
    y = umbrella['feature'].apply(lambda i: i + "*" if i in sig_p else i).values
    c1 = umbrella['r_range'].apply(lambda k: k[0]).values
    c2 = umbrella['r_range'].apply(lambda j: j[1]).values

    c3 = []
    c4 = []
    for i in range(len(x)):
        c3.append(x[i] - c1[i])
        c4.append(c2[i] - x[i])
    c = [c3, c4]

    ax.scatter(x, y)
    ax.errorbar(x, y, xerr=c, fmt="none", ecolor='black', capsize = 1.5, elinewidth=0.2)
    set_size(5,3)

    for i in range(len(x)+1):
        if i == len(x):
            ax.text(x=.46, y=-.5, s="r median")
        else:
            ax.text(x=.46, y=i+0.1, s=round(x[i], 3))

    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Pearson r')
    ax.set_title('Umbrella Features Prediction (Complete, {})'.fomat(my_model))

    fig.savefig(path + 'umbrella_prediction.png', bbox_inches='tight')
    
    return fig

# Combined Features Plot

In [None]:
"""
This function plots combined feature results on matplotlib.

Inputs:
df (pandas dataframe): dataframe of results
path (string): path to store plot

Outputs:
plot (matplotlib plot): plot of results
"""
def combined_plot(df, path):
    df = df.sort_values('r_median', ascending=False)
    sig_p = get_sig_p(combined_results_umbrella)

    plt.rcdefaults()
    fig, ax = plt.subplots()
    x = df['r_median'].values
    y = df['feature_'].apply(lambda i: i + "*" if i in sig_p else i).values
    c1 = df['r_range'].apply(lambda k: k[0]).values
    c2 = df['r_range'].apply(lambda j: j[1]).values

    c3 = []
    c4 = []
    for i in range(len(x)):
        c3.append(x[i] - c1[i])
        c4.append(c2[i] - x[i])
    c = [c3, c4]

    ax.scatter(x, y)
    ax.errorbar(x, y, xerr=c, fmt="none", ecolor='black', capsize = 1.5, elinewidth=0.2)
    set_size(5,2)
    
    if res_type == 'umbrella':

        for i in range(len(x)+1):
            if i == len(x):
                ax.text(x=.50, y=-.36, s="r median")
            else:
                ax.text(x=.50, y=i+.1, s=round(x[i], 3))

        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Pearson r')
        ax.set_title('Combined Umbrella Features Prediction (Complete)')

        fig.savefig('combined_umbrella_prediction.png', bbox_inches='tight')
    
    else:
        for i in range(len(x_)+1):
            if i == len(x_):
                ax.text(x=.525, y=-.36, s="r median")
            else:
                ax.text(x=.525, y=i+.1, s=round(x[i], 3))

        ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Pearson r')
        ax.set_title('Combined Individual Features Prediction (Complete)')


        fig.savefig('combined_individual_prediction.png', bbox_inches='tight')
    
    return fig