In [49]:
import timeit
import numpy as np
import pandas as pd
from scipy import stats

In [50]:
class Summary():
    '''
        Class Summary aims to calculate the summary values of lists, in an exploratory data analysis.
        
        ## Parameter: list.
        
        The calculated values are:
         
            
            Sorted asc': list_org_asc,
            Sorted desc': list_org_desc,
            # values': count,
            Sum': sum_all,
            Mean': mean,
            Median': list_mediana,
            Mode': mode,
            Variance': variance,
            Standard deviation': stdeviation,
            Normal Distribution': normal,
            Minimum': list_org_asc[0],
            Minor Outlier': min_out,
            Minor': minor,
            Q1': q1,
            Q2': q2,
            Q3': q3,
            Inter-Quantil' : iq
            Major': major,
            Major Outlier': max_out,
            Maximum': list_org_asc[-1],
            Range': li
        
    '''
    def __init__(self, list_to_summary: list) -> list():
        try:
            self.list_to_summary = list_to_summary
        except Exception:
            return 'Error: Only lists can be used as a parameter for Summary class.'
    
    
    def sort_list(self):
        '''
            Method for organizing the list, from the lowest value to the highest value, and to obtain the mode.
            
        '''
        list_org_asc = self.list_to_summary[:]
        count = 1
        count_mode = 0
        mode = 0
        bigger = 0  
        for k1 in range(len(list_org_asc) - 1):
            count += 1
            for k2 in range(len(list_org_asc) - 1):
                if list_org_asc[k2] > list_org_asc[k2 + 1]:
                    list_org_asc[k2], list_org_asc[k2 + 1] = list_org_asc[k2 + 1], list_org_asc[k2]
        
        list_org_desc = self.list_to_summary[:]
        for k1 in range(len(list_org_desc) - 1):
            for k2 in range(len(list_org_desc) - 1):
                if list_org_desc[k2] < list_org_desc[k2 + 1]:
                    list_org_desc[k2 + 1], list_org_desc[k2] = list_org_desc[k2], list_org_desc[k2 + 1]
        
        for k1 in list_org_desc:
            for k2 in list_org_desc:
                if k1 == k2:
                    count_mode += 1
                if count_mode > bigger:
                    bigger = count_mode
                    mode = k1
            count_mode = 0
        mode_end = [mode, bigger]
                    
        dict_result = {
            '# values': count,
            'Sorted asc': list_org_asc,
            'Sorted desc': list_org_desc,
            'Mode': mode_end
        }
        return dict_result
    
    
    def median_list(self):
        '''
            Calculation of median from list_to_summary.
            
            Return a dictonary with:
                Median
            
        '''
        list_org = Summary(self.list_to_summary).sort_list()['Sorted asc']
        count = Summary(self.list_to_summary).sort_list()['# values']
        if count%2 == 1:
            list_mediana = list_org[int(count/2)]
        else:
            list_mediana = (list_org[int(count/2)] + list_org[int(count/2) - 1])/2
        dict_result = {
            'Median': list_mediana,
        }
        return dict_result
    
    
    def mean_sum_list(self):
        '''
            Calculation of mean and sum of values of list_to_summary.
            
            Return a dictonary with:
                Mean
                Sum
            
        '''
        list_org = self.list_to_summary
        count = Summary(self.list_to_summary).sort_list()['# values']
        sum_all = 0
        for v in list_org:
            sum_all += v
            # count += 1
        dict_result = {
            'Sum': sum_all,
            'Mean': round(sum_all/count, 2),
        }
        return dict_result
    
    
    def max_min_list(self):
        '''
            Calculation of maximum and minimum values of list_to_summary.
            
            Return a dictonary with:
                Maximum
                Minimum
                Range
            
        '''
        list_org = Summary(self.list_to_summary).sort_list()['Sorted asc']
        dict_result = {
            'Maximum': list_org[-1],
            'Minimum': list_org[0],
            'Range': list_org[0] - list_org[-1],
        }
        return dict_result
    

    def variance_list(self):
        '''
            Calculation of variance and standard deviation of values of list_to_summary.
            
            Return a dictonary with:
                Variance
                Standard deviation
            
        '''
        list_org = Summary(self.list_to_summary).sort_list()['Sorted asc']
        count = Summary(self.list_to_summary).sort_list()['# values']
        mean = Summary(self.list_to_summary).mean_sum_list()['Mean']
        sum_variance = 0
        for c in list_org:
            sum_variance += (c - mean)**2
        variance = round(sum_variance/(count - 1), 2)
        deviation = round(variance ** 0.5, 2)
        dict_result = {
            'Variance': variance,
            'Standard deviation': deviation,
        }
        return dict_result


    def quartile_list(self):
        '''
            Calculation of quartile of values of list_to_summary.
            
            Return a dictonary with:
                Minor Outlier
                Minor
                Quartile 1
                Quartile 2
                Quartile 3
                Inter-Quantil
                Major
                Major Outlier
            
        '''
        list_org = Summary(self.list_to_summary).sort_list()['Sorted asc']
        count = Summary(self.list_to_summary).sort_list()['# values']
        
        q2 = Summary(self.list_to_summary).median_list()['Median']
        
        if count%2 == 1:
            q1 = Summary(list_org[:int(count/2)+1]).median_list()['Median']
            q3 = Summary(list_org[int(count/2):]).median_list()['Median']
        else:
            q1 = Summary(list_org[:int(count/2)]).median_list()['Median']
            q3 = Summary(list_org[int(count/2):]).median_list()['Median']
        
        iq = q3 - q1
        minor = (iq * 1.5) - q1
        major = (iq * 1.5) + q3
        
        if minor <= list_org[0]:
            minor = list_org[0]
        
        if major >= list_org[-1]:
            major = list_org[-1]      
        
        min_out = list()
        max_out = list()
        for c in list_org:
            if c < minor:
                min_out.append(c)
            if c > major:
                max_out.append(c)
        
        dict_result = {
            'Minor Outlier': min_out,
            'Minor': minor,
            'Q1': q1,
            'Q2': q2,
            'Q3': q3,
            'Inter-Quantil': iq,
            'Major': major,
            'Major Outlier': max_out
        }
        return dict_result

    
    def description_list(self):
        '''
            Calculation all values in the class of Summary from the list_to_summary.
            
            Return a dictonary with:
                Sorted
                # values
                Sum
                Mean
                Median
                Variance
                Standard deviation
                Minimum
                Minor Outlier
                Minor
                Q1
                Q2
                Q3
                Major
                Major Outlier
                Maximum
            
        '''
        list_org_asc = Summary(self.list_to_summary).sort_list()['Sorted asc']
        list_org_desc = Summary(self.list_to_summary).sort_list()['Sorted desc']
        count = Summary(self.list_to_summary).sort_list()['# values']
        list_mediana = Summary(self.list_to_summary).median_list()['Median']
        sum_all = Summary(self.list_to_summary).mean_sum_list()['Sum']
        mean = Summary(self.list_to_summary).mean_sum_list()['Mean']
        mode = Summary(self.list_to_summary).sort_list()['Mode']
        min_out = Summary(self.list_to_summary).quartile_list()['Minor Outlier']
        minor = Summary(self.list_to_summary).quartile_list()['Minor']
        q1 = Summary(self.list_to_summary).quartile_list()['Q1']
        q2 = Summary(self.list_to_summary).quartile_list()['Q2']
        q3 = Summary(self.list_to_summary).quartile_list()['Q3']
        iq = Summary(self.list_to_summary).quartile_list()['Inter-Quantil']
        major = Summary(self.list_to_summary).quartile_list()['Major']
        max_out = Summary(self.list_to_summary).quartile_list()['Major Outlier']
        variance = Summary(self.list_to_summary).variance_list()['Variance']
        stdeviation = Summary(self.list_to_summary).variance_list()['Standard deviation']

        dict_result = {
            'Sorted asc': list_org_asc,
            'Sorted desc': list_org_desc,
            '# values': count,
            'Sum': sum_all,
            'Mean': mean,
            'Median': list_mediana,
            'Mode': mode,
            'Variance': variance,
            'Standard deviation': stdeviation,
            'Minimum': list_org_asc[0],
            'Minor Outlier': min_out,
            'Minor': minor,
            'Q1': q1,
            'Q2': q2,
            'Q3': q3,
            'Inter-Quantil': iq,
            'Major': major,
            'Major Outlier': max_out,
            'Maximum': list_org_asc[-1],
            'Range': list_org_asc[-1] - list_org_asc[0], 
        }
        return dict_result
        

In [51]:
class NumpySummary():
    '''
        Class NumpySummary aims to calculate the summary values of lists, in an exploratory data analysis, with numpy library.
        
        ## Parameter: list.
        
        The calculated values are:
         
            - Sorted
            - # values
            - Sum
            - Mean
            - Median
            - Variance
            - Standard deviation
            - Minimum
            - Minor Outlier
            - Minor
            - Quartile 1
            - Quartile 2
            - Quartile 3
            - Inter-Quantil
            - Major
            - Major Outlier
            - Maximum
        
    '''
    def __init__(self, list_to_summary: list):
        if type(list_to_summary) != list:
            return TypeError, 'Error: Only lists can be used as a parameter for Summary class.'
        else:
            self.list_to_summary = list_to_summary
            
    
    
    def np_resumos(self):
        list_to_summary = self.list_to_summary[:]
        q1 = round(np.quantile(list_to_summary, .25), 1)
        q2 = round(np.quantile(list_to_summary, .5), 1)
        q3 = round(np.quantile(list_to_summary, .75), 1)
        iq = q3 - q1
        minor = (iq * 1.5) - q1
        major = (iq * 1.5) + q3
        
        if minor <= np.min(list_to_summary):
            minor = np.min(list_to_summary)
        
        if major >= np.max(list_to_summary):
            major = np.max(list_to_summary)
        
        min_out = list()
        max_out = list()
        for c in np.sort(list_to_summary):
            if c < minor:
                min_out.append(c)
            if c > major:
                max_out.append(c)

        data = [
            np.sort(list_to_summary),
            np.sort(list_to_summary)[::-1],
            len(list_to_summary),
            np.sum(list_to_summary),
            round(np.mean(list_to_summary), 2),
            round(np.median(list_to_summary), 2),
            stats.mode(list_to_summary),
            round(np.var(list_to_summary), 2),
            round(np.std(list_to_summary), 2),
            np.min(list_to_summary),
            min_out,
            minor,
            q1,
            np.median(list_to_summary),
            q3,
            iq,
            major,
            max_out,
            np.max(list_to_summary),
            np.max(list_to_summary) - np.min(list_to_summary),
        ]
        return data

In [52]:
# The list to be analyzed. Just an example, the methods can receive any list of integers.
test_list = [-20,1,3,-90,2,4,6,6,5,7,9,8,4,200,190]
test_list

[-20, 1, 3, -90, 2, 4, 6, 6, 5, 7, 9, 8, 4, 200, 190]

In [53]:
# Inserting the list in the Summary class
test_list_summary = Summary(test_list)
test_list_summary

<__main__.Summary at 0xae23d70>

In [54]:
print(test_list_summary.__doc__)


        Class Summary aims to calculate the summary values of lists, in an exploratory data analysis.
        
        ## Parameter: list.
        
        The calculated values are:
         
            
            Sorted asc': list_org_asc,
            Sorted desc': list_org_desc,
            # values': count,
            Sum': sum_all,
            Mean': mean,
            Median': list_mediana,
            Mode': mode,
            Variance': variance,
            Standard deviation': stdeviation,
            Normal Distribution': normal,
            Minimum': list_org_asc[0],
            Minor Outlier': min_out,
            Minor': minor,
            Q1': q1,
            Q2': q2,
            Q3': q3,
            Inter-Quantil' : iq
            Major': major,
            Major Outlier': max_out,
            Maximum': list_org_asc[-1],
            Range': li
        
    


In [55]:
# Since all the returns of the methods of the class are dictionaries, we will use loop for to observe the results.
# Total values in the list, ascending and descending organization
d = test_list_summary.sort_list().items()
for k, v in d:
    print(f'{k} _ {v}')

# values _ 15
Sorted asc _ [-90, -20, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 9, 190, 200]
Sorted desc _ [200, 190, 9, 8, 7, 6, 6, 5, 4, 4, 3, 2, 1, -20, -90]
Mode _ [6, 2]


In [56]:
# Total value and median
d = test_list_summary.median_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Median _ 5


In [57]:
# Average and sum of values
d = test_list_summary.mean_sum_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Sum _ 335
Mean _ 22.33


In [58]:
# Maximum and minimum list value
d = test_list_summary.max_min_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Maximum _ 200
Minimum _ -90
Range _ -290


In [59]:
# Distribution values
d = test_list_summary.quartile_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Minor Outlier _ [-90, -20, 1, 2, 3, 4, 4]
Minor _ 5.0
Q1 _ 2.5
Q2 _ 5
Q3 _ 7.5
Inter-Quantil _ 5.0
Major _ 15.0
Major Outlier _ [190, 200]


In [60]:
# Variance and standard deviation of values
d = test_list_summary.variance_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Variance _ 5532.52
Standard deviation _ 74.38


In [61]:
# All class values which describe the exploratory analysis of data in the list
d = test_list_summary.description_list().items()
for k, v in d:
    print(f'{k} _ {v}')

Sorted asc _ [-90, -20, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 9, 190, 200]
Sorted desc _ [200, 190, 9, 8, 7, 6, 6, 5, 4, 4, 3, 2, 1, -20, -90]
# values _ 15
Sum _ 335
Mean _ 22.33
Median _ 5
Mode _ [6, 2]
Variance _ 5532.52
Standard deviation _ 74.38
Minimum _ -90
Minor Outlier _ [-90, -20, 1, 2, 3, 4, 4]
Minor _ 5.0
Q1 _ 2.5
Q2 _ 5
Q3 _ 7.5
Inter-Quantil _ 5.0
Major _ 15.0
Major Outlier _ [190, 200]
Maximum _ 200
Range _ 290


In [65]:
# Obtaining a dictionary for structuring data in Panda.DataFrame, in order to compare values.
columns = list(test_list_summary.description_list().keys())
data1 = list(test_list_summary.description_list().values())

# Obtaining the descriptive data in numpy, for comparative analysis with the developed class.
data2 = NumpySummary(test_list).np_resumos()
data2 = list(test_list_summary.description_list().values())

# Construction of the DataFrame to compare the data between the constituted class and numpy.
df = pd.DataFrame(data=[data1, data2], columns=columns, index=['Summary Class','Numpy Class'])
df

Unnamed: 0,Sorted asc,Sorted desc,# values,Sum,Mean,Median,Mode,Variance,Standard deviation,Minimum,Minor Outlier,Minor,Q1,Q2,Q3,Inter-Quantil,Major,Major Outlier,Maximum,Range
Summary Class,"[-90, -20, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 9, 19...","[200, 190, 9, 8, 7, 6, 6, 5, 4, 4, 3, 2, 1, -2...",15,335,22.33,5,"[6, 2]",5532.52,74.38,-90,"[-90, -20, 1, 2, 3, 4, 4]",5.0,2.5,5,7.5,5.0,15.0,"[190, 200]",200,290
Numpy Class,"[-90, -20, 1, 2, 3, 4, 4, 5, 6, 6, 7, 8, 9, 19...","[200, 190, 9, 8, 7, 6, 6, 5, 4, 4, 3, 2, 1, -2...",15,335,22.33,5,"[6, 2]",5532.52,74.38,-90,"[-90, -20, 1, 2, 3, 4, 4]",5.0,2.5,5,7.5,5.0,15.0,"[190, 200]",200,290


In [63]:
# Summary Class runtime analysis
%timeit Summary(test_list).description_list()

7.54 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [64]:
# Numpy runtime analyses
%timeit NumpySummary(test_list).np_resumos()

1.34 ms ± 36.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
