In [28]:
import unittest
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.testing.decorators import check_figures_equal

import importlib
import pytest

In [4]:
import sys
import os

src_path = os.path.abspath(os.path.join(os.getcwd(), '../src'))
sys.path.insert(0, src_path)

spec = importlib.util.spec_from_file_location("functions", os.path.join(src_path, "functions.py"))
functions = importlib.util.module_from_spec(spec)
spec.loader.exec_module(functions)

In [5]:
from functions import plot_binary_feature

In [59]:
# plot_binary_feature unit tests


def is_binary_feature(dataset, feature):
    """
    Check if a feature in the dataset is binary.
    """
    
    # Check if the feature has exactly 2 unique values
    unique_values = dataset[feature].nunique()
    
    return unique_values == 2
    
def test_plot_binary_feature(df):
    """ Test if plot_binary_feature produces the correct plot """
    # Switch to a non-interactive backend
    plt.switch_backend('Agg')
    
    # Call the function to plot the figure
    try:
        functions.plot_binary_feature(df, 'feature')
    except Exception as e:
        pytest.fail(f"Plotting function raised an exception: {e}")

    # Here, you would typically compare the generated plot with a reference plot
    plt.savefig('test_plot.png')
    plt.close() 


def test_invalid_feature_column(df, feature):
    """ Test if the function raises KeyError for invalid column """
    if feature not in df.columns:
        raise KeyError(f"Feature '{feature}' not found in the dataset")


In [98]:
binary_feature = pd.DataFrame({'feature': [0, 1, 0, 1, 1, 0, 1, 0, 0, 1]})
non_binary_feature = pd.DataFrame({'feature': [0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 3, 0, 2]})
changed_feature_name = pd.DataFrame({'age': [0, 1, 0, 1, 2, 0, 1, 0, 0, 1, 3, 0, 2]})
zero_feature = pd.DataFrame({'feature': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})

In [50]:
is_binary_feature(binary_feature, 'feature')

True

In [52]:
test_plot_binary_feature(binary_feature)

  plt.show()


In [61]:
test_invalid_feature_column(binary_feature, 'feature')

In [54]:
is_binary_feature(non_binary_feature, 'feature')

False

In [62]:
test_plot_binary_feature(non_binary_feature)

  plt.show()


In [65]:
test_invalid_feature_column(non_binary_feature, 'feature')

In [68]:
is_binary_feature(changed_feature_name, 'age')

False

In [70]:
test_invalid_feature_column(changed_feature_name, 'feature')

KeyError: "Feature 'feature' not found in the dataset"

In [73]:
from functions import plot_categorical_feature

In [133]:
# plot_categorical_feature unit tests

def is_categorical_feature(dataset, feature, mapping_categories):
    """
    Check if a feature in the dataset is categorical.
    """
    
    # Check if the feature has exactly 2 unique values
    unique_values = dataset[feature].nunique()
    
    return unique_values >= 2

def test_plot_categorical_feature(dataset, feature, mapping_categories):
    """ Test if plot_categorical_feature produces the correct plot """

    # Use a non-interactive backend
    plt.switch_backend('Agg')
    
    # Call the function to plot the figure
    try:
        plot_categorical_feature(dataset, 'feature', mapping_categories)
    except Exception as e:
        pytest.fail(f"Plotting function raised an exception: {e}")

    # Save the plot to a file
    plt.savefig('test_categorical_plot.png')
    plt.close()  # Close the plot to avoid affecting other tests

def test_invalid_feature_column(dataset, feature, mapping_categories):
    """ Test if the function raises KeyError for an invalid column """
    if feature not in dataset.columns:
        raise KeyError(f"Feature '{feature}' not found in the dataset")


In [86]:
test_plot_categorical_feature(non_binary_feature, 'feature', ['A', 'B', 'C'])

  plt.show()


In [87]:
test_invalid_feature_column(non_binary_feature, 'feature', ['A', 'B', 'C'])

In [99]:
is_categorical_feature(zero_feature, 'feature', ['A', 'A', 'A'])

False

In [105]:
test_invalid_feature_column(zero_feature, 'age', ['A', 'B', 'C'])

KeyError: "Feature 'age' not found in the dataset"

In [108]:
from functions import plot_histogram_feature

In [134]:
# plot_histogram_feature unit tests

def is_continous_feature(dataset, feature):
    """
    Check if a feature in the dataset is continous.
    """
    
    if len(dataset[feature].unique()) > 1 and not dataset[feature].astype(int).eq(dataset[feature]).all():
        return True
    return False

def test_plot_histogram_feature(dataset, feature):
    """ Test if plot_histogram_feature produces the correct plot """

    # Use a non-interactive backend
    plt.switch_backend('Agg')
    
    # Call the function to plot the figure
    try:
        plot_histogram_feature(dataset, feature)
    except Exception as e:
        pytest.fail(f"Plotting function raised an exception: {e}")

    # Save the plot to a file
    plt.savefig('test_histogram_plot.png')
    plt.close()  # Close the plot to avoid affecting other tests

def test_invalid_feature_column(dataset, feature):
    """ Test if the function raises KeyError for an invalid column """
    if feature not in dataset.columns:
        raise KeyError(f"Feature '{feature}' not found in the dataset")

In [125]:
continous_data = pd.DataFrame({
        'continuous_feature': [1.2, 2.3, 3.4, 4.5, 5.6],
    })

In [126]:
is_continous_feature(continous_data, 'continuous_feature')

True

In [127]:
is_continous_feature(zero_feature, 'feature')

False

In [128]:
is_continous_feature(non_binary_feature, 'feature')

False

In [129]:
test_plot_histogram_feature(continous_data, 'continuous_feature')

  plt.show()


In [135]:
test_invalid_feature_column(continous_data, 'continuous_feature')