In [1]:
import math
from collections import Counter

# Sample datasets to work with.
# Let's use two datasets to demonstrate covariance and correlation.
data_x = [10, 20, 30, 40, 50, 60]
data_y = [5, 15, 25, 35, 45, 55]

# --- Measures of Central Tendency ---

def calculate_mean(data):
    """
    Calculates the mean (average) of a dataset.
    The mean is the sum of all values divided by the number of values.

    Args:
        data (list): A list of numerical data.

    Returns:
        float: The mean of the data.
    """
    return sum(data) / len(data)

def calculate_median(data):
    """
    Calculates the median of a dataset.
    The median is the middle value of a sorted dataset.
    If the dataset has an even number of values, it's the average of the two middle values.

    Args:
        data (list): A list of numerical data.

    Returns:
        float: The median of the data.
    """
    sorted_data = sorted(data)
    n = len(sorted_data)
    # Check if the number of data points is even
    if n % 2 == 0:
        # If even, take the average of the two middle elements
        mid1 = sorted_data[n // 2 - 1]
        mid2 = sorted_data[n // 2]
        return (mid1 + mid2) / 2
    else:
        # If odd, return the single middle element
        return sorted_data[n // 2]

def calculate_mode(data):
    """
    Calculates the mode(s) of a dataset.
    The mode is the value that appears most frequently in the data.
    A dataset can have multiple modes.

    Args:
        data (list): A list of numerical data.

    Returns:
        list: A list of the mode(s).
    """
    # Use Counter to count the frequency of each item in the list
    counts = Counter(data)
    # Handle the case where the data is empty or has no duplicates
    if not counts:
        return []
    
    # Find the highest frequency
    max_count = max(counts.values())
    
    # Find all items that have this highest frequency
    modes = [key for key, value in counts.items() if value == max_count]
    return modes

# --- Measures of Dispersion ---

def calculate_variance(data):
    """
    Calculates the sample variance of a dataset.
    Variance measures how far each number in the set is from the mean.
    It is the average of the squared differences from the mean. We use n-1
    for the sample variance, which is a common practice.

    Args:
        data (list): A list of numerical data.

    Returns:
        float: The sample variance of the data.
    """
    n = len(data)
    if n < 2:
        return 0.0 # Variance is zero for a single data point
    mean = calculate_mean(data)
    # Calculate the sum of the squared differences from the mean
    squared_differences = sum([(x - mean) ** 2 for x in data])
    # Divide by n-1 for sample variance
    return squared_differences / (n - 1)

def calculate_std_dev(data):
    """
    Calculates the standard deviation of a dataset.
    Standard deviation is the square root of the variance. It indicates the
    average amount of variation or dispersion around the mean.

    Args:
        data (list): A list of numerical data.

    Returns:
        float: The standard deviation of the data.
    """
    variance = calculate_variance(data)
    return math.sqrt(variance)

# --- Measures of Relationship ---

def calculate_covariance(data_x, data_y):
    """
    Calculates the sample covariance between two datasets.
    Covariance measures the direction of the linear relationship between two variables.
    A positive covariance means the variables move together.
    A negative covariance means they move in opposite directions.

    Args:
        data_x (list): The first list of numerical data.
        data_y (list): The second list of numerical data.

    Returns:
        float: The sample covariance between the two datasets.
    """
    n = len(data_x)
    if n != len(data_y) or n < 2:
        # Handle cases where the lists are of different lengths or too short
        return 0.0

    mean_x = calculate_mean(data_x)
    mean_y = calculate_mean(data_y)
    
    # Calculate the sum of the products of the deviations from the mean
    sum_products = sum([(data_x[i] - mean_x) * (data_y[i] - mean_y) for i in range(n)])
    
    # Divide by n-1 for sample covariance
    return sum_products / (n - 1)

def calculate_correlation(data_x, data_y):
    """
    Calculates the correlation coefficient (Pearson's r) between two datasets.
    Correlation measures the strength and direction of a linear relationship
    between two variables. The value ranges from -1 to 1.
    - 1: Perfect positive correlation
    - 0: No correlation
    - -1: Perfect negative correlation

    Args:
        data_x (list): The first list of numerical data.
        data_y (list): The second list of numerical data.

    Returns:
        float: The correlation coefficient.
    """
    covariance = calculate_covariance(data_x, data_y)
    std_dev_x = calculate_std_dev(data_x)
    std_dev_y = calculate_std_dev(data_y)

    # Avoid division by zero if standard deviation is zero
    if std_dev_x == 0 or std_dev_y == 0:
        return 0.0
    
    return covariance / (std_dev_x * std_dev_y)

# --- Z-Score ---

def calculate_zscore(value, data):
    """
    Calculates the Z-score for a specific value in a dataset.
    The Z-score (or standard score) tells you how many standard deviations
    a particular data point is from the mean of the dataset.

    Args:
        value (float): The specific data point to find the Z-score for.
        data (list): The list of numerical data.

    Returns:
        float: The Z-score for the given value.
    """
    mean = calculate_mean(data)
    std_dev = calculate_std_dev(data)

    # Avoid division by zero if standard deviation is zero
    if std_dev == 0:
        return 0.0

    return (value - mean) / std_dev

# --- Main Program Execution ---

if __name__ == "__main__":
    print("--- Statistical Analysis of a Single Dataset ---")
    data_single = [2, 3, 5, 5, 7, 8, 10]
    print(f"Dataset: {data_single}")
    print(f"Mean: {calculate_mean(data_single):.2f}")
    print(f"Median: {calculate_median(data_single):.2f}")
    print(f"Mode: {calculate_mode(data_single)}")
    print(f"Variance: {calculate_variance(data_single):.2f}")
    print(f"Standard Deviation: {calculate_std_dev(data_single):.2f}")

    # Let's find the Z-score for the value 8 in the dataset
    value_to_zscore = 8
    zscore = calculate_zscore(value_to_zscore, data_single)
    print(f"Z-score for {value_to_zscore}: {zscore:.2f}")

    print("\n--- Analysis of Two Datasets for Relationship ---")
    print(f"Dataset X: {data_x}")
    print(f"Dataset Y: {data_y}")
    print(f"Covariance: {calculate_covariance(data_x, data_y):.2f}")
    print(f"Correlation: {calculate_correlation(data_x, data_y):.2f}")


--- Statistical Analysis of a Single Dataset ---
Dataset: [2, 3, 5, 5, 7, 8, 10]
Mean: 5.71
Median: 5.00
Mode: [5]
Variance: 7.90
Standard Deviation: 2.81
Z-score for 8: 0.81

--- Analysis of Two Datasets for Relationship ---
Dataset X: [10, 20, 30, 40, 50, 60]
Dataset Y: [5, 15, 25, 35, 45, 55]
Covariance: 350.00
Correlation: 1.00
