# Simple GUI for Dataframes

Call this in your notebook using `%run this_notebook.ipynb`, when you have already defined a dataframe as `df`.

In [41]:
import pandas as pd
import ipywidgets as widgets
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns 
from IPython.display import display, clear_output, Markdown
%matplotlib inline

# Numeric Features

In [23]:
numeric_features = []
for i in df.columns:
    if str(df[i].dtype) == 'int64' or str(df[i].dtype) == 'float64':
        numeric_features.append(i)
        
columns = widgets.SelectMultiple(
    options=numeric_features,
    description='Numeric',
    disabled=False
)

The value attribute of our widget returns a tuple:

In [46]:
# Plot the top column selected
def histogram():
    figure(figsize=(5,3))
    plt.hist(df[columns.value[0]], bins=100)
    plt.axvline(x=np.median(df[columns.value[0]]), color='r', linestyle='-')
    plt.title(columns.value[0].upper() +'\n Median ' + str(np.median(df[columns.value[0]])))
    plt.show()

In [48]:
# Plot a boxplot of the top column selected
def boxplot():
    figure(figsize=(5,3))
    plt.boxplot(df[columns.value[0]])
    plt.title('{}\n MAX {}'.format(columns.value[0].upper(),
                                         df[columns.value[0]].max()))
    plt.show()

In [44]:
# Plot the top column selected
def plot():
    figure(figsize=(5,3))
    plt.plot(df[columns.value[0]], color='black', linewidth=0.1)
    plt.axhline(y=np.mean(df[columns.value[0]]), color='r', linestyle='-')
    plt.title(columns.value[0].upper()+'\n mean {:.2f}'.format(np.mean(df[columns.value[0]])))
    plt.show()

In [7]:
# Describe selected columns
def describe_columns():
    print(df.describe().loc[:, list(columns.value)])


In [47]:
# Corrrelation plot of selected columns
def correlation():
    figure(figsize=(5,3))
    sns.heatmap(df.loc[:, list(columns.value)].corr(), 
                square=True, cmap='RdYlGn')
    plt.title('Correlation')
    plt.show()

## Categorical functions


In [9]:
button2 = widgets.Button(description='Get Info')
out2 = widgets.Output()
def on_button2_clicked(b):
    with out2:
        clear_output()
        my_col = df[cat_cols.value[0]]
        many_na = sum(my_col.isna())
        display(Markdown('''- The feature has in total {} unique values:
- Contains {} missing values'''.format(len(set(my_col)), 
                                       many_na)))
button2.on_click(on_button2_clicked)

def display_cat():
    display(cat_cols, widgets.VBox([button2, out2]))

In [10]:
functions = {'Plot': plot, 'Histogram': histogram, 'Boxplot': boxplot, 
             'Correlation +': correlation, 'Description +': describe_columns, 'Show Categories': display_cat}

opciones = widgets.Select(
    options=['Plot', 
             'Histogram', 
             'Boxplot',
             'Correlation +',
             'Description +',
             'Show Categories'],
    #rows=10,
    description='Options',
    disabled=False
)

In [11]:
button = widgets.Button(description='Run')
out = widgets.Output()
def on_button_clicked(b):
    with out:
        clear_output()
        functions[opciones.value]()
button.on_click(on_button_clicked)

## Some Info

In [21]:
info = Markdown('''# Usual EDA
The dataframe has been loaded as `df` and can be explored with the options below. It contains {} rows, {} features 
- For the numeric features you can select one more features (holding shift) in the 'Numeric' dropdown.
- The options 'Correlation +' and 'Description +'
accept the selected features
- The options Plot, Histogram and Boxplot will make a graph of only one column. If more than one are selected, the one from the top will be chosen
'''.format(df.shape[0], df.shape[1]))  


line = Markdown('''______''')

In [33]:
# deleted info and line below!
display(widgets.HBox([widgets.VBox([columns,opciones, button]), 
                      widgets.VBox([out])]))

HBox(children=(VBox(children=(SelectMultiple(description='Numeric', options=('longitude', 'latitude', 'housing…