In [2]:
def equal_width_binning(data, col_index, bins):
    col_values = [float(row[col_index]) for row in data]
    min_val, max_val = min(col_values), max(col_values)
    bin_width = (max_val - min_val) / bins

    for row in data:
        val = float(row[col_index])
        bin_num = int((val - min_val) // bin_width)
        if bin_num == bins:
            bin_num = bins - 1
        row[col_index] = bin_num
    return data


In [3]:
def min_max_normalize(data, col_index):
    col_values = [float(row[col_index]) for row in data]
    min_val, max_val = min(col_values), max(col_values)

    for row in data:
        row[col_index] = (float(row[col_index]) - min_val) / (max_val - min_val)
    return data


In [4]:
import math

def one_sample_t_test(sample, mu_0):
    n = len(sample)
    mean = sum(sample) / n
    variance = sum((x - mean) ** 2 for x in sample) / (n - 1)
    stddev = math.sqrt(variance)
    t_stat = (mean - mu_0) / (stddev / math.sqrt(n))
    return t_stat


In [5]:
def chi_square_test(observed, expected):
    chi_square = 0
    for o, e in zip(observed, expected):
        chi_square += (o - e) ** 2 / e
    return chi_square


In [6]:
def confusion_matrix(actual, predicted, positive_label):
    TP = FP = TN = FN = 0
    for a, p in zip(actual, predicted):
        if a == positive_label and p == positive_label:
            TP += 1
        elif a != positive_label and p == positive_label:
            FP += 1
        elif a == positive_label and p != positive_label:
            FN += 1
        elif a != positive_label and p != positive_label:
            TN += 1
    return {'TP': TP, 'FP': FP, 'TN': TN, 'FN': FN}


In [7]:
data = [
    ['5.1', '3.5', '1.4', '0.2', 'yes'],
    ['4.9', '3.0', '1.4', '0.2', 'no'],
    ['6.7', '3.1', '4.7', '1.5', 'yes'],
    ['5.8', '2.7', '5.1', '1.9', 'no'],
    ['6.0', '3.0', '4.8', '1.8', 'yes']
]

print("Original Data:")
for row in data:
    print(row)

print("\n=== Equal Width Binning on column 0 into 3 bins ===")
binned_data = equal_width_binning(data.copy(), col_index=0, bins=3)
for row in binned_data:
    print(row)

Original Data:
['5.1', '3.5', '1.4', '0.2', 'yes']
['4.9', '3.0', '1.4', '0.2', 'no']
['6.7', '3.1', '4.7', '1.5', 'yes']
['5.8', '2.7', '5.1', '1.9', 'no']
['6.0', '3.0', '4.8', '1.8', 'yes']

=== Equal Width Binning on column 0 into 3 bins ===
[0, '3.5', '1.4', '0.2', 'yes']
[0, '3.0', '1.4', '0.2', 'no']
[2, '3.1', '4.7', '1.5', 'yes']
[1, '2.7', '5.1', '1.9', 'no']
[1, '3.0', '4.8', '1.8', 'yes']


In [8]:

data = [
    ['5.1', '3.5', '1.4', '0.2', 'yes'],
    ['4.9', '3.0', '1.4', '0.2', 'no'],
    ['6.7', '3.1', '4.7', '1.5', 'yes'],
    ['5.8', '2.7', '5.1', '1.9', 'no'],
    ['6.0', '3.0', '4.8', '1.8', 'yes']
]

print("Original Data:")
for row in data:
    print(row)

print("\n Min-Max Normalization on column 2 ")
normalized_data = min_max_normalize(data.copy(), col_index=2)
for row in normalized_data:
    print(row)

print("\n One Sample t-Test ")
sample = [float(row[0]) for row in data]
mu_0 = 5.5
t_stat = one_sample_t_test(sample, mu_0)
print(f"t-statistic: {t_stat:.4f}")

print("\n Chi-Square Test ")
observed = [50, 30, 20]
expected = [40, 40, 20]
chi2 = chi_square_test(observed, expected)
print(f"Chi-square statistic: {chi2:.4f}")

print("\n Confusion Matrix ")
actual = ['yes', 'no', 'yes', 'no', 'yes']
predicted = ['yes', 'yes', 'yes', 'no', 'no']
cm = confusion_matrix(actual, predicted, positive_label='yes')
print(cm)


Original Data:
['5.1', '3.5', '1.4', '0.2', 'yes']
['4.9', '3.0', '1.4', '0.2', 'no']
['6.7', '3.1', '4.7', '1.5', 'yes']
['5.8', '2.7', '5.1', '1.9', 'no']
['6.0', '3.0', '4.8', '1.8', 'yes']

 Min-Max Normalization on column 2 
['5.1', '3.5', 0.0, '0.2', 'yes']
['4.9', '3.0', 0.0, '0.2', 'no']
['6.7', '3.1', 0.891891891891892, '1.5', 'yes']
['5.8', '2.7', 1.0, '1.9', 'no']
['6.0', '3.0', 0.918918918918919, '1.8', 'yes']

 One Sample t-Test 
t-statistic: 0.6172

 Chi-Square Test 
Chi-square statistic: 5.0000

 Confusion Matrix 
{'TP': 2, 'FP': 1, 'TN': 1, 'FN': 1}
