In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import networkx as nx

%matplotlib inline

In [2]:
def describe_data(serie: pd.Series, title, nb_bins=None, kde=True, figsize=(10, 6)) -> pd.DataFrame:
    """ Returns describe and plots of the Serie: plots are a boxplot and a histogram

    :type kde: bool
    :type nb_bins: int
    :type title: str
    :type serie: pd.Series
    :param serie: serie to be described
    :param title: title of the figure
    :param nb_bins: nb of bins of hist
    :param kde: wether or not to put a contour line in the hist
    :return: described dataframe with plots
    """
    if type(serie) != pd.core.series.Series:
        raise TypeError('serie must be a pd.Series not {}'.format(type(serie)))
    df = pd.DataFrame(data=serie)
    f, axes = plt.subplots(1, 2, figsize=figsize)
    sns.set_style("darkgrid", {"axes.facecolor": ".9"})
    sns.boxplot(data=df, ax=axes[0])
    sns.distplot(df, color="b", kde=kde, ax=axes[1], bins=nb_bins)
    plt.tight_layout()
    plt.grid(True)
    plt.suptitle(title, fontsize=20)
    plt.subplots_adjust(top=.9)
    plt.show()
    return df.describe().T


def pairwise_plot(df: pd.DataFrame) -> pd.DataFrame:
    sns.set_style("darkgrid", {"axes.facecolor": ".9"})
    sns.pairplot(df)
    plt.suptitle('Pairwise plot', fontsize=20)
    plt.subplots_adjust(top=.9)
    plt.show()
    return df.corr(method='spearman')


def regression_plot(df: pd.DataFrame, col1: str, col2: str, title: str, figsize=(7, 5)):
    """
    ScatterPlot data give dataframe df, col1 name, col2 name, title, and a linear regression model fit
    return spearman corrrelation by default
    """
    correlation_pearson = df[[col1, col2]].corr(method='pearson').iloc[0, 1]
    correlation_spearman = df[[col1, col2]].corr(method='spearman').iloc[0, 1]
    f, axes = plt.subplots(1, 1, figsize=figsize)
    sns.set_style("darkgrid", {"axes.facecolor": ".9"})
    sns.regplot(df[col1], df[col2], scatter_kws={'s': 3, 'color': 'blue'}, line_kws={'color': 'red'}, ax=axes)
    f.suptitle(title)
    plt.show()

    print('Pearson correlation coefficient is: {:.2}   \n'.format(correlation_pearson))
    print('Spearman correlation coefficient is: {:.2}   \n'.format(correlation_spearman))

    return correlation_pearson, correlation_spearman