In [15]:
class PandasDataframeFunctions:
    
    def weighted_rating(
        df : 'DataFrame',
        t  : 'Column contain <titles>',
        d  : 'Column contain <duration>',
        r  : 'column contains <avg_vote>',
        v  : 'column contains <total_votes',    
        n  : 'Number of ranked movie required: Default Value=250' = 250 ):

        """
        This formula provides a true 'Bayesian estimate', which takes into account the number of votes each title has received, minimum votes required to be on the list, and the mean vote for all titles:
        weighted rating (WR) = (v ÷ (v+m)) × R + (m ÷ (v+m)) × C
        Where:
        R = average for the movie (mean) = (rating)
        v = number of votes for the movie = (votes)
        m = minimum votes required to be listed in the Top Rated list (currently 25,000)
        C = the mean vote across the whole report"""
    
        df_filter = df[(df[d] >= 45) & (df[d] <= 300) & (df[v] >25_000)]
        m = v
        c = df[r].mean()
        #c = np.quantile(df[r],q=.8)

        top_movie = dict()
        for index, row in df_filter.iterrows():
            top_movie[row[t]]=(row[v]/(row[v]+row[m]) * row[r]) + (row[m]/(row[v]+row[m]) * c)
        return {k:v for k,v in sorted(top_movie.items(), key=lambda item : item[1], reverse=True)[:n]}
    
    
    def unpack_values(value):
        unique_values = set()
        for item in value:
            unique_values.update(x for x in {*value.split(', ')})
        return unique_values 
    
    def unpack_series(df, col_name):
        unique_values = set()
        for index,row in df[col_name].iteritems():
            unique_values.update(x for x in {*row.split(', ')})
        return unique_values
   
    def dropcol(df, percentage:'integer number 1-100'):
        for col in df:
            if df[col].isnull().mean() > (percentage/100):
                df.drop([col], axis=1, inplace=True)
    
    
    def df_info(df):
        df_nans=df.isnull().values.sum()
        #df_nans=df.isna().sum().sum()
        df_rows = df.shape[0]
        df_columns = df.shape[1]
        df_total_items = df.shape[0] * df.shape[1]

        print(f"DataFrame Shape   : {df.shape[0]} rows and {df.shape[1]} columns")
        print(f"DataFrame items   : {df_total_items}")
        print(f"DataFrame Null    : {df_nans}  Null values")
        print(f"DataFrame contain : {round(df_nans / df_total_items * 100, 2)}% Null Values \n")

        for x in range(df.columns.size):
            print(f"{df.columns[x].ljust(30,' ')} total nan : {df[df.columns[x]].isnull().values.sum()} with {round(df[df.columns[x]].isnull().values.sum() / df_rows * 100, 2)}%")

            
    def isnull_any(df: 'DataFrame or Series') -> 'DataFrame':
        """ returns an array (Series) of boolean indicating whether each
        corresponding element is missing."""
        return df.isnull().any() #-> return boolean series

    def isnull_values_any(df: 'DataFrame or Series') -> 'DataFrame':
        """ return True or False whtere given dataset contain missing values. """
        return df.isnull().values.any()

    def isnull_sum(df: 'DataFrame or Series') -> ' DataFrame or Series':
        """ return True or False whtere given dataset contain missing values. """
        return df.isnull().values.sum() > 0

    def isnull_values_sum(df: 'DataFrame or Series') -> 'DataFrame':
        """ Return (Series) the sum of the missing values over the requested axis. """
        return df.isnull().sum()