## Question 1: Sliding Window Anomaly Score

The user provides the array and the window size `k`.
We use two heaps to efficiently maintain the median.

In [1]:

import heapq

def sliding_window_anomaly(arr, k):
    max_heap = []
    min_heap = []

    def balance():
        if len(max_heap) > len(min_heap) + 1:
            heapq.heappush(min_heap, -heapq.heappop(max_heap))
        elif len(min_heap) > len(max_heap):
            heapq.heappush(max_heap, -heapq.heappop(min_heap))

    def median():
        if k % 2 == 1:
            return -max_heap[0]
        return (-max_heap[0] + min_heap[0]) / 2

    for x in arr[:k]:
        heapq.heappush(max_heap, -x)
        heapq.heappush(min_heap, -heapq.heappop(max_heap))
        balance()

    result = []
    for i in range(k, len(arr)):
        result.append(abs(arr[i] - median()))

        outgoing = arr[i - k]
        incoming = arr[i]

        if outgoing <= -max_heap[0]:
            max_heap.remove(-outgoing)
            heapq.heapify(max_heap)
        else:
            min_heap.remove(outgoing)
            heapq.heapify(min_heap)

        if incoming <= -max_heap[0]:
            heapq.heappush(max_heap, -incoming)
        else:
            heapq.heappush(min_heap, incoming)

        balance()

    return result

# -------- USER INPUT --------
arr = list(map(int, input("Enter numbers separated by space: ").split()))
k = int(input("Enter window size k: "))

if k <= 0 or k >= len(arr):
    print("Invalid k value")
else:
    print("Anomaly Scores:", sliding_window_anomaly(arr, k))


Anomaly Scores: [20, 70]


## Question 2: Top-K Correlated Feature Pairs

The user enters number of rows, columns, dataset values, and k.

In [3]:

import math
import heapq

def top_k_correlated_features(data, k):
    n = len(data)
    m = len(data[0])

    cols = list(zip(*data))

    means = [sum(col) / n for col in cols]
    stds = [
        math.sqrt(sum((x - means[i]) ** 2 for x in cols[i]) / n)
        for i in range(m)
    ]

    heap = []

    for i in range(m):
        for j in range(i + 1, m):
            cov = sum(
                (cols[i][r] - means[i]) * (cols[j][r] - means[j])
                for r in range(n)
            ) / n

            if stds[i] == 0 or stds[j] == 0:
                continue

            corr = cov / (stds[i] * stds[j])

            if len(heap) < k:
                heapq.heappush(heap, (abs(corr), i, j, corr))
            elif abs(corr) > heap[0][0]:
                heapq.heappushpop(heap, (abs(corr), i, j, corr))

    return [(i, j, round(c, 3)) for _, i, j, c in sorted(heap, reverse=True)]

# -------- USER INPUT --------
rows = int(input("Enter number of rows: "))
cols = int(input("Enter number of columns: "))

data = []
print("Enter data row by row:")
for _ in range(rows):
    data.append(list(map(float, input().split())))

k = int(input("Enter k (top correlations): "))

print("Top-K Correlated Feature Pairs:", top_k_correlated_features(data, k))


Enter data row by row:
Top-K Correlated Feature Pairs: [(0, 2, 1.0)]
