In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import textwrap
import warnings
warnings.filterwarnings("ignore")

sns.set_style("darkgrid")
sns.set_palette("PRGn")

In [None]:
class Data_Explorer:
    def __init__(self, df):
        self.df = df
        self.numeric_features = self.df.select_dtypes('number').columns
        self.numeric_binary = [var for var in self.df[self.numeric_features] if len(np.unique(self.df[var])) == 2]
        self.numeric_discrete = [var for var in self.df[self.numeric_features] if len(np.unique(self.df[var])) <= 20 & 
                 len(np.unique(self.df[var])) > 2]
        self.numeric_continuous = [var for var in self.df[self.numeric_features] if var not in self.numeric_binary and var not in self.numeric_discrete]
        self.categorical_features = self.df.select_dtypes('object').columns
        self.categorical_dichotomous =  [var for var in self.df[self.categorical_features] if self.df[var].nunique() == 2]
        self.categorical_nominal = [var for var in self.df[self.categorical_features] if self.df[var].nunique() >= 3 and self.df[var].nunique() <= 100]
        self.categorical_narrative = [var for var in self.df[self.categorical_features] if self.df[var].nunique() > 100]
    
    def evaluate_nulls(self):
        return self.df.isnull().sum()/len(self.df)
    
    def evaluate_numeric_feature_nulls(self):
        null_percentages = self.df[self.numeric_features].isnull().sum() / len(self.df) * 100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_numeric_binary_nulls(self):
        null_percentages = self.df[self.numeric_binary].isnull().sum()/len(self.df) * 100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_numeric_discrete_nulls(self):
        null_percentages = self.df[self.numeric_discrete].isnull().sum()/len(self.df) * 100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_categorical_feature_nulls(self):
        null_percentages = self.df[self.categorical_features].isnull().sum()/len(self.df) *100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_categorical_dichotomous_nulls(self):
        null_percentages = self.df[self.categorical_dichotomous].isnull().sum()/len(self.df) *100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_categorical_nominal_nulls(self):
        null_percentages = self.df[categorical_nominal].isnull().sum()/len(self.df) *100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def evaluate_categorical_narrative_nulls(self):
        null_percentages = self.df[self.categorical_narrative].isnull().sum()/len(self.df) *100
        column = ["Percent Null"]
        temp_df = pd.DataFrame(null_percentages, columns = column)
        return temp_df
    
    def plot_numeric_continuous(self):
        fig, axes = plt.subplots(nrows= 1, ncols= len(self.numeric_continuous), figsize=(9, 2 * len(self.numeric_continuous)))
        for i, column in enumerate(self.numeric_continuous):
            sns.distplot(x = self.df[column], ax=axes[i])
            axes[i].set_title(f'Distribution Plot - {column}')
    
    def plot_numeric_features(self):
        # Set up the figure and axes
        fig, axes = plt.subplots(nrows=len(self.numeric_features), ncols=2, figsize=(8, 3 * len(self.numeric_features)))
        for i, column in enumerate(self.numeric_features):
            sns.boxplot(data=self.df[column], ax=axes[i, 0])
            axes[i, 0].set_title(f'Boxplot - {column}')
            sns.histplot(data=self.df[column], ax=axes[i, 1], bins=10)
            axes[i, 1].set_title(f'Histogram - {column}')
            plt.tight_layout()
    
    def plot_categorical_dichotomous(self):
        fig, axes = plt.subplots(nrows= 1, ncols= len(self.categorical_dichotomous), figsize=(9, 2 * len(self.categorical_dichotomous)))
        for i, column in enumerate(self.categorical_dichotomous):
            sns.countplot(x = self.df[column], ax=axes[i])
            axes[i].set_title(f'Count Plot - {column}')
    
    def plot_categorical_nominal(self):
        fig, axes = plt.subplots(nrows=len(self.categorical_nominal), figsize=(10, 7 * len(self.categorical_nominal)))
        for i, column in enumerate(self.categorical_nominal):
            sns.countplot(data=df, x=column, ax=axes[i], order=df[column].value_counts().index)
            axes[i].set_title(f'Count Plot - {column}')
            axes[i].set_ylabel('Unique Count')
            
            for p in axes[i].patches:
                height = p.get_height()
                if pd.notna(height):
                    axes[i].annotate(f'{int(height)}', (p.get_x() + p.get_width() / 2., height), ha='center', va='bottom', fontsize=9)
                axes[i].tick_params(axis='x', labelrotation=87)
        
        plt.subplots_adjust(hspace=.5) 