In [45]:
import numpy as np
class Stats:
    def __init__(self, data):
        self.data = np.array(data)
        
    def mean(self):
        """Calculates the arithmetic mean (average) of the data.
        It initializes a variable total to store the sum of all values.
        The variable n is assigned the length of the data (number of elements).
        It then iterates through each element in the data, adding them to the total.
        Finally, the arithmetic mean is calculated by dividing the total sum by the number of elements (n) 
        """
        total = 0
        n = len(self.data)
        for x in self.data:
            total += x
        return total / n
    
    
    def gmean(self):
        """Calculates the geometric mean of the data.
        It initializes a variable total to store the product of all elements.
        It then iterates through each element in the data, multiplying them together.
        The geometric mean is calculated by taking the nth root of the total product,
        where n is the number of elements in the data.
        """
        
        total = 1
        for x in self.data:
            total *= x
        return total ** (1/len(self.data))
      
    
   
    def hmean(self):
        """Calculates the Harmonic Mean of the data.
        It first retrieves the length of the data 
        The variable denominators is initialized to zero. 
        This variable will store the sum of reciprocals of the data points.
        It then iterates through each data point (x) in the data, 
        adding the reciprocal of each data point to denominators.
        Finally, the Harmonic Mean is calculated as the ratio of the number of data points to the sum of reciprocals
        """
        
        n = len(self.data)
        denominators = 0
        for x in self.data:
            denominators += 1/x
        return n / denominators
  
    def mode(self):
        
        """
        Determines mode by building a frequency map.
        Uses a loop to count occurrences of each unique value.
        Returns the value with the highest frequency.
        """
        
        freq = {}
        for i in range(len(self.data)):
            val = self.data[i]
            if val in freq:
                freq[val] += 1
            else:
                freq[val] = 1
                mode = max(freq, key=freq.get)
        return mode 
    
    
    def median(self):
        
        """Calculates median:
        sorted_data: Sorts array using np.sort() built-in 
        count: Gets length of sorted array using len()
        mid: Index of middle element, count // 2
        If count is odd, return middle element
        If even, return average of mid and mid-1 element """
        
        sorted_data = np.sort(self.data)
        count = len(sorted_data)
        mid = count // 2
        if count % 2 != 0:
            return sorted_data[mid]
        else:  
            return (sorted_data[mid] + sorted_data[mid-1]) / 2
        
    def describe(self):
        
        """Returns dictionary summary:
        mean: Arithmetic mean custom method
        median: Median custom method
        mode: Mode custom method  
        std_dev: Standard deviation custom method
        min: Minimum custom method
        max: Maximum custom method """
        
        return {"mean": self.mean(),
                "median": self.median(),
                "mode": self.mode(),
                "std_dev": self.std_dev(),
                "min": self.min1(),
                "max": self.max1()}
    
    def variance(self):
        
        """Calculates variance:
        mean: Get mean via custom method
        diff: Find diff from mean for each value
        total: Sum of squared diffs 
        Divide total by length to get variance"""
        
        mean = self.mean()
        total = 0
        for value in self.data:
            diff = value - mean
            total += diff**2
        return total / len(self.data)
    
    def std_dev(self):
        
        """Calculates standard deviation:
        self.variance(): Get variance using custom variance method
        Return square root of variance  """
        return self.variance() ** (1/2)
  
    def min1(self):
        
        """Finds minimum value:
        Initialize val as first element 
        Loop through data
        Update val if a smaller value is found
        Return val """
        val = self.data[0]
        for value in self.data:
            if value < val:
                val = value
        return val
    
    def max1(self):
        
        """Finds maximum value:  
        Initialize val as first element
        Loop through data 
        Update val if a larger value is found
        Return val"""
        val = self.data[0]
        for value in self.data:
            if value > val:
                val = value
        return val
    
    def range1(self):
        
        """Calculates range:
        Use min1() method to get minimum value
        Use max1() method to get maximum value
        Returns difference between max and min"""   
        return self.max1() - self.min1()

    
    def quantile(self, quantile):
        
        """Calculates value at given quantile:
        Sorts data using np.sort() built-in
        Gets index based on quantile and length
        Returns element at the index"""
        sorted_data = np.sort(self.data)
        index = int(quantile * len(self.data))
        return sorted_data[index]
    
    
    def iqr(self):
        
        """Calculates the Interquartile Range (IQR) of the data.
        It uses the quantile method, assumed to be defined elsewhere, 
        to calculate the first quartile (Q1) and the third quartile (Q3).
        The IQR is then obtained by subtracting Q1 from Q3"""
        
        q1 = self.quantile(0.25)
        q3 = self.quantile(0.75)
        return q3 - q1
    
    def coeff_of_range(self):
        
        """Calculates coefficient of range:
        self.range1(): Get range using custom range method
        self.mean(): Get mean using custom mean method  
        Returns range divided by mean
        Relies on range and mean methods defined earlier.
        Coefficient of range shows width of distribution relative to mean.
        """
        return self.range1() / self.mean()
    
    def coeff_of_variation(self):
        
        """Calculates coefficient of variation:
        self.std_dev(): Get standard deviation via custom method
        self.mean(): Get mean through custom method
        Returns standard deviation divided by mean
        Relies on standard deviation and mean methods.
        Coefficient of variation measures data dispersion normalized to mean.
        Higher value indicates higher variability.
        """
        return self.std_dev() / self.mean()
    
    def coeff_of_std_dev(self):
        
        """Calculates coefficient of standard deviation:
        self.std_dev(): Get standard deviation using custom method
        self.mean(): Get mean via custom method
        Returns standard deviation divided by mean and multiplied by 100
        Relies on standard deviation and mean methods. 
        Useful for comparing spread across different datasets.
        """
        return self.std_dev() / self.mean() * 100
    
    def coeff_of_quartile_dev(self):
        
        """Calculates coefficient of quartile deviation:
        q1: Get first quartile using quantile() method 
        q3: Get third quartile using quantile() method
        iqr: Find interquartile range by taking difference of q3 and q1
        Return iqr divided by sum of q1 and q3
        Relies on custom quantile() method to find quartiles."""
        
        q1 = self.quantile(0.25)
        q3 = self.quantile(0.75)
        iqr = q3 - q1
        return iqr / (q3 + q1)
    
    
    def mean_dev(self):
        
        """Calculate mean deviation:
        mean: Get arithmetic mean using existing custom method
        total_dev: Variable to store sum of deviations
        Loop through data and find deviation from mean
        Take absolute value of deviation using abs()
        Add to running total_dev
        Divide total by count to get mean deviation
        Relies on custom mean() method to calculate arithmetic mean.""" 
        
        mean = self.mean()
        total_dev = 0
        for value in self.data:
            dev = abs(value - mean)
            total_dev += dev
        return total_dev / len(self.data)
    
    def coeff_of_mean_dev(self):
        
        """Calculates the coefficient of mean deviation.
        It uses two methods (mean_dev and mean) to calculate the mean deviation and arithmetic mean, respectively.
        The coefficient of mean deviation is then calculated by dividing the mean deviation by the arithmetic mean."""
        
        return self.mean_dev() / self.mean()
    

### A. Code for Statistical Measures

In [62]:
import numpy as np
class Stats:
    def __init__(self, data):
        self.data = np.array(data)
        
    def mean(self):
        """Calculates the arithmetic mean (average) of the data.
        It initializes a variable total to store the sum of all values.
        The variable n is assigned the length of the data (number of elements).
        It then iterates through each element in the data, adding them to the total.
        Finally, the arithmetic mean is calculated by dividing the total sum by the number of elements (n) 
        """
        total = 0
        n = len(self.data)
        for x in self.data:
            total += x
        return total / n
    
    
    def gmean(self):
        """Calculates the geometric mean of the data.
        It initializes a variable total to store the product of all elements.
        It then iterates through each element in the data, multiplying them together.
        The geometric mean is calculated by taking the nth root of the total product,
        where n is the number of elements in the data.
        """
        
        total = 1
        for x in self.data:
            total *= x
        return total ** (1/len(self.data))
      
    
   
    def hmean(self):
        """Calculates the Harmonic Mean of the data.
        It first retrieves the length of the data 
        The variable denominators is initialized to zero. 
        This variable will store the sum of reciprocals of the data points.
        It then iterates through each data point (x) in the data, 
        adding the reciprocal of each data point to denominators.
        Finally, the Harmonic Mean is calculated as the ratio of the number of data points to the sum of reciprocals
        """
        
        n = len(self.data)
        denominators = 0
        for x in self.data:
            denominators += 1/x
        return n / denominators
  
    def mode(self):
        
        """
        Determines mode by building a frequency map.
        Uses a loop to count occurrences of each unique value.
        Returns the value with the highest frequency.
        """
        
        freq = {}
        for i in range(len(self.data)):
            val = self.data[i]
            if val in freq:
                freq[val] += 1
            else:
                freq[val] = 1
                mode = max(freq, key=freq.get)
        return mode 
    
    
    def median(self):
        
        """Calculates median:
        sorted_data: Sorts array using np.sort() built-in 
        count: Gets length of sorted array using len()
        mid: Index of middle element, count // 2
        If count is odd, return middle element
        If even, return average of mid and mid-1 element """
        
        sorted_data = np.sort(self.data)
        count = len(sorted_data)
        mid = count // 2
        if count % 2 != 0:
            return sorted_data[mid]
        else:  
            return (sorted_data[mid] + sorted_data[mid-1]) / 2
        
    def describe(self):
        
        """Returns dictionary summary:
        mean: Arithmetic mean custom method
        median: Median custom method
        mode: Mode custom method  
        std_dev: Standard deviation custom method
        min: Minimum custom method
        max: Maximum custom method """
        
        return {"mean": self.mean(),
                "median": self.median(),
                "mode": self.mode(),
                "std_dev": self.std_dev(),
                "min": self.min1(),
                "max": self.max1()}
    
    def variance(self):
        
        """Calculates variance:
        mean: Get mean via custom method
        diff: Find diff from mean for each value
        total: Sum of squared diffs 
        Divide total by length to get variance"""
        
        mean = self.mean()
        total = 0
        for value in self.data:
            diff = value - mean
            total += diff**2
        return total / len(self.data)
    
    def std_dev(self):
        
        """Calculates standard deviation:
        self.variance(): Get variance using custom variance method
        Return square root of variance  """
        return self.variance() ** (1/2)
  
    def min1(self):
        
        """Finds minimum value:
        Initialize val as first element 
        Loop through data
        Update val if a smaller value is found
        Return val """
        val = self.data[0]
        for value in self.data:
            if value < val:
                val = value
        return val
    
    def max1(self):
        
        """Finds maximum value:  
        Initialize val as first element
        Loop through data 
        Update val if a larger value is found
        Return val"""
        val = self.data[0]
        for value in self.data:
            if value > val:
                val = value
        return val
    
    def range1(self):
        
        """Calculates range:
        Use min1() method to get minimum value
        Use max1() method to get maximum value
        Returns difference between max and min"""   
        return self.max1() - self.min1()

    
    def quantile(self, quantile):
        
        """Calculates value at given quantile:
        Sorts data using np.sort() built-in
        Gets index based on quantile and length
        Returns element at the index"""
        sorted_data = np.sort(self.data)
        index = int(quantile * len(self.data))
        return sorted_data[index]
    
    
    def iqr(self):
        
        """Calculates the Interquartile Range (IQR) of the data.
        It uses the quantile method, assumed to be defined elsewhere, 
        to calculate the first quartile (Q1) and the third quartile (Q3).
        The IQR is then obtained by subtracting Q1 from Q3"""
        
        q1 = self.quantile(0.25)
        q3 = self.quantile(0.75)
        return q3 - q1
    
    def coeff_of_range(self):
        
        """Calculates coefficient of range:
        self.range1(): Get range using custom range method
        self.mean(): Get mean using custom mean method  
        Returns range divided by mean
        Relies on range and mean methods defined earlier.
        Coefficient of range shows width of distribution relative to mean.
        """
        return self.range1() / self.mean()
    
    def coeff_of_variation(self):
        
        """Calculates coefficient of variation:
        self.std_dev(): Get standard deviation via custom method
        self.mean(): Get mean through custom method
        Returns standard deviation divided by mean
        Relies on standard deviation and mean methods.
        Coefficient of variation measures data dispersion normalized to mean.
        Higher value indicates higher variability.
        """
        return self.std_dev() / self.mean()
    
    def coeff_of_std_dev(self):
        
        """Calculates coefficient of standard deviation:
        self.std_dev(): Get standard deviation using custom method
        self.mean(): Get mean via custom method
        Returns standard deviation divided by mean and multiplied by 100
        Relies on standard deviation and mean methods. 
        Useful for comparing spread across different datasets.
        """
        return self.std_dev() / self.mean() * 100
    
    def coeff_of_quartile_dev(self):
        
        """Calculates coefficient of quartile deviation:
        q1: Get first quartile using quantile() method 
        q3: Get third quartile using quantile() method
        iqr: Find interquartile range by taking difference of q3 and q1
        Return iqr divided by sum of q1 and q3
        Relies on custom quantile() method to find quartiles."""
        
        q1 = self.quantile(0.25)
        q3 = self.quantile(0.75)
        iqr = q3 - q1
        return iqr / (q3 + q1)
    
    
    def mean_dev(self):
        
        """Calculate mean deviation:
        mean: Get arithmetic mean using existing custom method
        total_dev: Variable to store sum of deviations
        Loop through data and find deviation from mean
        Take absolute value of deviation using abs()
        Add to running total_dev
        Divide total by count to get mean deviation
        Relies on custom mean() method to calculate arithmetic mean.""" 
        
        mean = self.mean()
        total_dev = 0
        for value in self.data:
            dev = abs(value - mean)
            total_dev += dev
        return total_dev / len(self.data)
    
    def coeff_of_mean_dev(self):
        
        """Calculates the coefficient of mean deviation.
        It uses two methods (mean_dev and mean) to calculate the mean deviation and arithmetic mean, respectively.
        The coefficient of mean deviation is then calculated by dividing the mean deviation by the arithmetic mean."""
        
        return self.mean_dev() / self.mean()
    

### Applying custom method on SepalLenth Column

In [63]:
import pandas as pd

In [64]:
data=pd.read_csv(r"C:\Users\mdimr\Downloads\Iris (1).csv")

In [48]:
column_to_analyze = 'SepalLengthCm'

In [49]:
data =data[column_to_analyze]

In [50]:
x=Stats(data)

In [51]:
x

<__main__.Stats at 0x1e09baf74d0>

In [52]:
print("Arithmetic Mean:", x.mean())
print("Geometric Mean:", x.gmean())
print("Harmonic Mean:", x.hmean())
print("Mode:", x.mode())
print("Median:", x.median())
print("Describe:", x.describe())
print("Variance:", x.variance())
print("Standard Deviation:", x.std_dev())
print("Max Value:", x.max1())
print("Min Value:", x.min1())
print("Range:", x.range1())
print("IQR:", x.iqr())
print("Quartile:Q1",x.quantile(0.25))
print("Quartile:Q2",x.quantile(0.50))
print("Quartile:Q3",x.quantile(0.75))
print("Coefficient of Range:", x.coeff_of_range())
print("Coefficient of Variation:", x.coeff_of_variation())
print("Coefficient of Standard Deviation:", x.coeff_of_std_dev())
print("Coefficient of Quartile Deviation:", x.coeff_of_quartile_dev())
print("Coefficient of Mean Deviation:", x.coeff_of_mean_dev())

Arithmetic Mean: 5.843333333333335
Geometric Mean: 5.785720390427729
Harmonic Mean: 5.728905057850834
Mode: 5.0
Median: 5.8
Describe: {'mean': 5.843333333333335, 'median': 5.8, 'mode': 5.0, 'std_dev': 0.8253012917851409, 'min': 4.3, 'max': 7.9}
Variance: 0.6811222222222222
Standard Deviation: 0.8253012917851409
Max Value: 7.9
Min Value: 4.3
Range: 3.6000000000000005
IQR: 1.3000000000000007
Quartile:Q1 5.1
Quartile:Q2 5.8
Quartile:Q3 6.4
Coefficient of Range: 0.6160867084997147
Coefficient of Variation: 0.1412380989934639
Coefficient of Standard Deviation: 14.12380989934639
Coefficient of Quartile Deviation: 0.11304347826086962
Coefficient of Mean Deviation: 0.11766495531469869


### Applying Custom methods on SepalWidth Column

In [53]:
data=pd.read_csv(r"C:\Users\mdimr\Downloads\Iris (1).csv")

In [54]:
column_to_analyze = 'SepalWidthCm'
data =data[column_to_analyze]
x=Stats(data)
x

<__main__.Stats at 0x1e09baf5350>

In [55]:
print("Arithmetic Mean:", x.mean())
print("Geometric Mean:", x.gmean())
print("Harmonic Mean:", x.hmean())
print("Mode:", x.mode())
print("Median:", x.median())
print("Describe:", x.describe())
print("Variance:", x.variance())
print("Standard Deviation:", x.std_dev())
print("Max Value:", x.max1())
print("Min Value:", x.min1())
print("Range:", x.range1())
print("IQR:", x.iqr())
print("Quartile:Q1",x.quantile(0.25))
print("Quartile:Q2",x.quantile(0.50))
print("Quartile:Q3",x.quantile(0.75))
print("Coefficient of Range:", x.coeff_of_range())
print("Coefficient of Variation:", x.coeff_of_variation())
print("Coefficient of Standard Deviation:", x.coeff_of_std_dev())
print("Coefficient of Quartile Deviation:", x.coeff_of_quartile_dev())
print("Coefficient of Mean Deviation:", x.coeff_of_mean_dev())

Arithmetic Mean: 3.0540000000000007
Geometric Mean: 3.0235822036025914
Harmonic Mean: 2.9931367940540596
Mode: 3.0
Median: 3.0
Describe: {'mean': 3.0540000000000007, 'median': 3.0, 'mode': 3.0, 'std_dev': 0.4321465800705435, 'min': 2.0, 'max': 4.4}
Variance: 0.1867506666666667
Standard Deviation: 0.4321465800705435
Max Value: 4.4
Min Value: 2.0
Range: 2.4000000000000004
IQR: 0.5
Quartile:Q1 2.8
Quartile:Q2 3.0
Quartile:Q3 3.3
Coefficient of Range: 0.7858546168958742
Coefficient of Variation: 0.14150182713508297
Coefficient of Standard Deviation: 14.150182713508297
Coefficient of Quartile Deviation: 0.0819672131147541
Coefficient of Mean Deviation: 0.10906788910718188


### Applying Custom methods on PetalWidth Column

In [56]:
data=pd.read_csv(r"C:\Users\mdimr\Downloads\Iris (1).csv")

In [57]:
column_to_analyze = 'PetalWidthCm'
data =data[column_to_analyze]
x=Stats(data)
x

<__main__.Stats at 0x1e09baf5fd0>

In [58]:
print("Arithmetic Mean:", x.mean())
print("Geometric Mean:", x.gmean())
print("Harmonic Mean:", x.hmean())
print("Mode:", x.mode())
print("Median:", x.median())
print("Describe:", x.describe())
print("Variance:", x.variance())
print("Standard Deviation:", x.std_dev())
print("Max Value:", x.max1())
print("Min Value:", x.min1())
print("Range:", x.range1())
print("IQR:", x.iqr())
print("Quartile:Q1",x.quantile(0.25))
print("Quartile:Q2",x.quantile(0.50))
print("Quartile:Q3",x.quantile(0.75))
print("Coefficient of Range:", x.coeff_of_range())
print("Coefficient of Variation:", x.coeff_of_variation())
print("Coefficient of Standard Deviation:", x.coeff_of_std_dev())
print("Coefficient of Quartile Deviation:", x.coeff_of_quartile_dev())
print("Coefficient of Mean Deviation:", x.coeff_of_mean_dev())

Arithmetic Mean: 1.1986666666666672
Geometric Mean: 0.8378270050250772
Harmonic Mean: 0.48664645154044783
Mode: 0.2
Median: 1.3
Describe: {'mean': 1.1986666666666672, 'median': 1.3, 'mode': 0.2, 'std_dev': 0.760612618588172, 'min': 0.1, 'max': 2.5}
Variance: 0.5785315555555559
Standard Deviation: 0.760612618588172
Max Value: 2.5
Min Value: 0.1
Range: 2.4
IQR: 1.5
Quartile:Q1 0.3
Quartile:Q2 1.3
Quartile:Q3 1.8
Coefficient of Range: 2.0022246941045596
Coefficient of Variation: 0.6345489031603212
Coefficient of Standard Deviation: 63.45489031603212
Coefficient of Quartile Deviation: 0.7142857142857143
Coefficient of Mean Deviation: 0.5497219132369291


### Applying Custom methods on PetalLength Column

In [59]:
data=pd.read_csv(r"C:\Users\mdimr\Downloads\Iris (1).csv")

In [60]:
column_to_analyze = 'PetalLengthCm'
data =data[column_to_analyze]
x=Stats(data)
x

<__main__.Stats at 0x1e09803c9d0>

In [61]:
print("Arithmetic Mean:", x.mean())
print("Geometric Mean:", x.gmean())
print("Harmonic Mean:", x.hmean())
print("Mode:", x.mode())
print("Median:", x.median())
print("Describe:", x.describe())
print("Variance:", x.variance())
print("Standard Deviation:", x.std_dev())
print("Max Value:", x.max1())
print("Min Value:", x.min1())
print("Range:", x.range1())
print("IQR:", x.iqr())
print("Quartile:Q1",x.quantile(0.25))
print("Quartile:Q2",x.quantile(0.50))
print("Quartile:Q3",x.quantile(0.75))
print("Coefficient of Range:", x.coeff_of_range())
print("Coefficient of Variation:", x.coeff_of_variation())
print("Coefficient of Standard Deviation:", x.coeff_of_std_dev())
print("Coefficient of Quartile Deviation:", x.coeff_of_quartile_dev())
print("Coefficient of Mean Deviation:", x.coeff_of_mean_dev())

Arithmetic Mean: 3.7586666666666693
Geometric Mean: 3.2397566359576
Harmonic Mean: 2.6964718010995794
Mode: 1.5
Median: 4.35
Describe: {'mean': 3.7586666666666693, 'median': 4.35, 'mode': 1.5, 'std_dev': 1.7585291834055201, 'min': 1.0, 'max': 6.9}
Variance: 3.0924248888888854
Standard Deviation: 1.7585291834055201
Max Value: 6.9
Min Value: 1.0
Range: 5.9
IQR: 3.4999999999999996
Quartile:Q1 1.6
Quartile:Q2 4.4
Quartile:Q3 5.1
Coefficient of Range: 1.5697055693508326
Coefficient of Variation: 0.46785983950129095
Coefficient of Standard Deviation: 46.78598395012909
Coefficient of Quartile Deviation: 0.5223880597014925
Coefficient of Mean Deviation: 0.415551614047534
