# Data Visualization

In [26]:
#Distribution of instances according to classification (label)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import missingno as msno
%matplotlib inline
%config InlineBackend.figure_format='svg'
%config InlineBackend.rc={'figure.figsize': (5, 2.5)}

def classes_distribution(data):   
    class_name = data.columns[-1] #Get name of the last column (class)
    plt.rc("font", size=6)
    data[class_name].value_counts(sort=False).plot(kind='bar')
    plt.xlabel(class_name)
    plt.ylabel('score')
    plt.show()
    
#Function that prints a table with the missing values
def missing_values_table(data):

    # Total missing values
    mis_val = data.isnull().sum()

    # Percentage of missing values
    mis_val_percent = 100 * data.isnull().sum() / len(data)

    # Make a table with the results
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)

    # Rename the columns
    mis_val_table_ren_columns = mis_val_table.rename(
    columns = {0 : 'Missing Values', 1 : '% of Total Values'})

    # Sort the table by percentage of missing descending
    mis_val_table_ren_columns = mis_val_table_ren_columns[
        mis_val_table_ren_columns.iloc[:,1] != 0].sort_values(
    '% of Total Values', ascending=False).round(1)

    # Print some summary information
    print ("Your selected dataframe has " + str(data.shape[1]) + " columns.\n"      
        "There are " + str(mis_val_table_ren_columns.shape[0]) +
          " columns that have missing values.")

    # Return the dataframe with missing information
    return mis_val_table_ren_columns


#Function that prints a bar graph with the missing values
#The bar graph provides a quick graphical overview of the integrity of the dataset
#Is a simple visualization of nullity by column
def missing_values_bar_chart(data, width, height):
    msno.bar(data, color="gray", figsize=(width,height))

#Function that prints the nullity matrix. It shows the pattern of missing data.
def nullity_matrix(data, width, height):
    msno.matrix(data, figsize=(width,height))
    
#Function that shows the correlation heatmap it measures nullity correlation
'''Nullity correlation ranges from -1 (if one variable appears the other definitely does not) to 0 (variables appearing or 
not appearing have no effect on one another) to 1 (if one variable appears the other definitely also does).'''
def correlation_heatmap(data, width, height):
    msno.heatmap(data, figsize=(width, height))
