<h2>PS1 Worksheet - Data Mining</h2>

Q1) Without Using Inbuilt Functions - Ungrouped Data

In [2]:
def calculate_mean(data):
    if not data:
        return None
    
    total=0

    for value in data:
        total+=value
    
    return total/len(data)


def calculate_median(data):
    if not data:
        return None

    data.sort()
    n = len(data)

    middle_index = n//2

    if middle_index%2==1:
        return data[middle_index]
    else:
        return (data[middle_index-1]+data[middle_index])/2


def calculate_mode(data):

    if not data:
        return None
    
    hash_map = {}

    for value in data:
        if value not in hash_map.keys():
            hash_map[value]=1
        else:
            hash_map[value]+=1
    
    max_freq=0

    for k,v in hash_map.items():
        if v > max_freq:
            max_freq=v
    
    if max_freq == 1:
        return None
    
    # Find all values with maximum frequency
    modes = []
    for value, freq in hash_map.items():
        if freq == max_freq:
            modes.append(value)
    
    # If multiple modes exist, return all of them
    return modes if len(modes) > 1 else modes[0]



def display_results(data):

    """Display the dataset and all calculated statistics."""
    print(f"Dataset: {data}")
    print(f"Sorted dataset: {sorted(data)}")
    print("-" * 40)
    
    mean_value = calculate_mean(data)
    median_value = calculate_median(data)
    mode_value = calculate_mode(data)
    
    print(f"Mean: {mean_value:.2f}" if mean_value is not None else "Mean: N/A")
    print(f"Median: {median_value:.2f}" if median_value is not None else "Median: N/A")
    
    if mode_value is None:
        print("Mode: No mode (all values are unique)")
    elif isinstance(mode_value, list):
        print(f"Mode(s): {', '.join(map(str, mode_value))}")
    else:
        print(f"Mode: {mode_value}")
    print("-" * 40)

def main():
    print("STATISTICAL MEASURES CALCULATOR")
    print("=" * 40)  
    print("\n" + "=" * 40)
    print("CUSTOM DATASET")
    print("=" * 40)
    
    try:
        user_input = input("Enter numbers separated by spaces: ")
        if user_input.strip():
            user_data = [float(x) for x in user_input.split()]
            display_results(user_data)
        else:
            print("No data entered. Using default dataset.")
            default_data = [6, 2, 8, 4, 6, 2, 6, 9, 2]
            display_results(default_data)
    except ValueError:
        print("Invalid input! Please enter numbers only.")


if __name__ == "__main__":
    main()

STATISTICAL MEASURES CALCULATOR

CUSTOM DATASET
Dataset: [4.0, 5.0, 5.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 45.0, 5.0]
Sorted dataset: [1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 45.0]
----------------------------------------
Mean: 6.82
Median: 3.00
Mode(s): 2.0, 5.0
----------------------------------------


Q2) Without Using Inbuilt Functions - Grouped Data

In [None]:
def calculate_mean_grouped_data(class_intervals, freqs):
    midpoints = []
    total_fx = 0
    total_f = 0
    
    for i in range(len(class_intervals)):
        midpoint = (class_intervals[i][0] + class_intervals[i][1]) / 2
        midpoints.append(midpoint)
        
        total_fx += freqs[i] * midpoint
        total_f += freqs[i]
    
    return total_fx / total_f if total_f != 0 else 0


def calculate_median_grouped_data(class_intervals, freqs):
    n = sum(freqs)
    
    # median class (where cumulative frequency >= n/2)
    cumulative_freq = 0
    median_class_index = 0
    
    for i in range(len(freqs)):
        cumulative_freq += freqs[i]
        if cumulative_freq >= n/2:
            median_class_index = i
            break
    
    L = class_intervals[median_class_index][0]  # Lower class boundary
    B = cumulative_freq - freqs[median_class_index]  # Cumulative freq before median class
    G = freqs[median_class_index]  # Frequency of median class
    width = class_intervals[median_class_index][1] - class_intervals[median_class_index][0]
    
    # median formula: L + ((n/2 - B)/G) * width
    return L + ((n/2 - B) / G) * width


def calculate_mode_grouped_data(class_intervals, freqs):
    # (class with highest frequency)
    max_freq_index = 0
    max_freq = freqs[0]
    
    for i in range(1, len(freqs)):
        if freqs[i] > max_freq:
            max_freq = freqs[i]
            max_freq_index = i
    
    L = class_intervals[max_freq_index][0]  # Lower class boundary
    fm = freqs[max_freq_index]  # Frequency of modal class
    
    # Get fm-1 (frequency of previous class) or 0 if first class
    fm_1 = freqs[max_freq_index - 1] if max_freq_index > 0 else 0
    
    # Get fm+1 (frequency of next class) or 0 if last class
    fm_1 = freqs[max_freq_index + 1] if max_freq_index < len(freqs) - 1 else 0
    
    width = class_intervals[max_freq_index][1] - class_intervals[max_freq_index][0]
    
    # Apply mode formula: L + ((fm - fm-1) / ((fm - fm-1) + (fm - fm+1))) * width
    numerator = fm - fm_1
    denominator = (fm - fm_1) + (fm - fm_1)
    
    return L + (numerator / denominator) * width if denominator != 0 else L


def main():
    class_intervals = []
    freqs = []

    # Input class intervals and frequencies
    n = int(input("Enter number of class intervals: "))
    
    for i in range(n):
        print(f"\nClass Interval {i+1}:")
        lower = float(input("  Enter lower limit: "))
        upper = float(input("  Enter upper limit: "))
        freq = int(input("  Enter frequency: "))
        
        class_intervals.append((lower, upper))
        freqs.append(freq)
    
    mean = calculate_mean_grouped_data(class_intervals, freqs)
    median = calculate_median_grouped_data(class_intervals, freqs)
    mode = calculate_mode_grouped_data(class_intervals, freqs)

    print(f"\nMean for the grouped data is {mean:.2f}")
    print(f"Median for the grouped data is {median:.2f}")
    print(f"Mode for the grouped data is {mode:.2f}")


if __name__ == "__main__":
    main()


Class Interval 1:
Mean for the grouped data is 1.0
Median for the grouped data is 11.0
