# **Note:**

* Your output should be a list in the following format:
['ASUId',
Mean_of_feature1_for_digit0, Variance_of_feature1_for_digit0,
Mean_of_feature2_for_digit0, Variance_of_feature2_for_digit0 ,
Mean_of_feature1_for_digit1, Variance_of_feature1_for_digit1,
Mean_of_feature2_for_digit1, Variance_of_feature2_for_digit1,
Accuracy_for_digit0testset, Accuracy_for_digit1testset]
* The **order** of these 11 components are important.
* Please print the result in the same cell that contains the comment `"### TEST FUNCTION: test_question1"` so that the autograder can capture your output and provide accurate feedback. Do not print anything else in the code cell below.
* You can divide the code into different cells but the output should be printed in the cell containing the comment `"### TEST FUNCTION: test_question1"` 


In [12]:
### TEST FUNCTION: test_question1
   
import numpy
import scipy.io
import math
import geneNewData
import numpy as np

def extract_features(data_set):

    feature1 = np.mean(data_set, axis=(1,2)) # Calculate mean brightness (feature1) for each image
    feature2 = np.std(data_set, axis=(1,2)) # Calculate standard deviation of brightness (feature2) for each image

    return np.column_stack((feature1, feature2)) # Combine the two features into a single 2D array where each row is [feature1, feature2] for an image

def calculate_parameters(features):

    # Mean and variance of feature1
    feature1_mean = np.mean(features[:,0])
    feature1_variance = np.var(features[:,0])

    # Mean and variance of feature2
    feature2_mean = np.mean(features[:,1])
    feature2_variance = np.var(features[:,1])

    return feature1_mean, feature1_variance, feature2_mean, feature2_variance

def predict_class(features, mean1_d0, variance1_d0, mean2_d0, variance2_d0, mean1_d1, variance1_d1, mean2_d1, variance2_d1):

    # Extracting features
    features1 = features[:, 0]
    features2 = features[:, 1]

    # Calculate probabilities for feature1 and feature2 for digit0 class using Gaussian PDF
    p1_d0 = (1 / np.sqrt(2 * np.pi * variance1_d0)) * np.exp(-(features1 - mean1_d0) ** 2 / (2 * variance1_d0))
    p2_d0 = (1 / np.sqrt(2 * np.pi * variance2_d0)) * np.exp(-(features2 - mean2_d0) ** 2 / (2 * variance2_d0))
    p_d0 = p1_d0 * p2_d0 # Joint probaility for digit0 class

    # Calculate probabilities for feature1 and feature2 for digit1 class using Gaussian PDF
    p1_d1 = (1 / np.sqrt(2 * np.pi * variance1_d1)) * np.exp(-(features1 - mean1_d1) ** 2 / (2 * variance1_d1))
    p2_d1 = (1 / np.sqrt(2 * np.pi * variance2_d1)) * np.exp(-(features2 - mean2_d1) ** 2 / (2 * variance2_d1))
    p_d1 = p1_d1 * p2_d1 # Joint probaility for digit1 class
    
    return p_d0 > p_d1 # Return True if digit0 class, otherwise False if digit1 class

def main():
    myID= '0406' #change to last 4 digit of your studentID
    geneNewData.geneData(myID)
    Numpyfile0 = scipy.io.loadmat('digit0_stu_train'+myID+'.mat')
    Numpyfile1 = scipy.io.loadmat('digit1_stu_train'+myID+'.mat')
    Numpyfile2 = scipy.io.loadmat('digit0_testset'+'.mat')
    Numpyfile3 = scipy.io.loadmat('digit1_testset'+'.mat')
    train0 = Numpyfile0.get('target_img')
    train1 = Numpyfile1.get('target_img')
    test0 = Numpyfile2.get('target_img')
    test1 = Numpyfile3.get('target_img')
    #print([len(train0),len(train1),len(test0),len(test1)])
    #print('Your trainset and testset are generated successfully!')
    
    # Feature extraction for training and testing data
    train0_features = extract_features(train0)
    train1_features = extract_features(train1)
    test0_features = extract_features(test0)
    test1_features = extract_features(test1)

    # Calculate parameters (mean and variance) for the training datasets
    feature1_mean_d0, feature1_variance_d0, feature2_mean_d0, feature2_variance_d0 = calculate_parameters(train0_features)
    feature1_mean_d1, feature1_variance_d1, feature2_mean_d1, feature2_variance_d1 = calculate_parameters(train1_features)

    # Predict classes for the testing datasets
    predict_digit0 = predict_class(test0_features, feature1_mean_d0, feature1_variance_d0, feature2_mean_d0, 
    feature2_variance_d0, feature1_mean_d1, feature1_variance_d1, feature2_mean_d1, feature2_variance_d1)

    predict_digit1 = predict_class(test1_features, feature1_mean_d0, feature1_variance_d0, feature2_mean_d0, 
    feature2_variance_d0, feature1_mean_d1, feature1_variance_d1, feature2_mean_d1, feature2_variance_d1)
    
    # Calculate accuracy for each test set
    accuracy_test0 = np.mean(predict_digit0 == True)
    accuracy_test1 = np.mean(predict_digit1 == False)

    # Output list
    results = [myID, 
    feature1_mean_d0, feature1_variance_d0, feature2_mean_d0, feature2_variance_d0,
    feature1_mean_d1, feature1_variance_d1, feature2_mean_d1, feature2_variance_d1,
    accuracy_test0, accuracy_test1]

    print(results)

if __name__ == '__main__':
    main()


['0406', 44.12499566326531, 113.85588999926534, 87.36115347709884, 100.58482244996796, 19.33574387755102, 31.09648836321741, 61.30217910218628, 82.04463062118892, 0.9173469387755102, 0.9233480176211454]
