<div id="toggle_code">...</div>

<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>

<script src="https://cdn.jsdelivr.net/gh/philipwlewis/jupyterlab-toc-toggle@1.0/jlab-toc-toggle.js"></script>

<style>

.jlab-table td {

border: 1px solid black !important;

text-align: center !important;

background: white !important;

}

.jlab-table table {

background: white !important;

margin: 1em auto 1em auto !important;

text-align: center !important;

border-collapse: collapse !important;

border: 1px solid black !important;

}

.jlab-table th {

border: 1px solid black !important;

text-align: center !important;

background: aliceblue !important;

}

</style>

In [1]:
# Function Definition
%matplotlib agg
import io
import base64
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import HTML
from scipy.stats import linregress, spearmanr, f_oneway, ttest_ind
from scipy.stats import stats

def to_upper_str(df, columns):
    """
    Convert specified columns in a DataFrame to uppercase strings and remove leading and trailing whitespaces.

    Parameters:
        df (DataFrame): The DataFrame containing the data.
        columns (list): A list of column names in the DataFrame to be converted.

    Returns:
        df (DataFrame): A copy of the original DataFrame with the specified columns converted to uppercase strings.
    """
    
    for column in columns:
        df[column] = [str(item).upper().strip() for item in df[column].dropna().tolist()]
    return df

def drop_empty(df, column):
    """
    Remove rows with empty values in the specified column of a DataFrame.

    Parameters:
        df (DataFrame): The DataFrame containing the data.
        column (str): The name of the column in the DataFrame to be checked for empty values.

    Returns:
        df (DataFrame): A copy of the original DataFrame with rows containing empty values in the specified column removed.
    """
    
    df[column].replace('', np.nan, inplace=True)
    df = df.dropna(subset=[column])
    
    return df

def get_outliers(df, columns):
    """
    Get a list of outlier IDs from the specified columns of a DataFrame.

    Parameters:
        df (DataFrame): The DataFrame containing the data.
        columns (list): A list of column names in the DataFrame to check for outliers.

    Returns:
        all_outlier_list (list): A list of outlier IDs found in any of the specified columns.
    """
    
    # Initialize an empty list to store all outlier IDs.
    all_outlier_list = []
    for column in columns:
        
        # Calculate quartiles and IQR for the column.
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        
        # Calculate the lower and upper bounds for outliers.
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        # Find outliers and add their IDs to the outlier list.
        all_outlier_set = set(all_outlier_list)
        col_outlier_set = set(df[(df[column] < lower_bound) | (df[column] > upper_bound)]["id"].tolist())
        all_outlier_list = list(all_outlier_set.union(col_outlier_set))
        
    return all_outlier_list

def get_error_bar(values):
    """
    Calculate the mean and standard error of a set of values.

    Parameters:
        values (array-like): An array-like object containing the values.

    Returns:
        tuple: A tuple containing the mean and standard error of the values.
    """
    
    mean = np.mean(values)
    std_err = np.std(values) / np.sqrt(len(values))
    
    return mean, std_err

def linear_regression(x_values, y_values, ax):
    """
    Perform linear regression and plot the best fit line along with the regression equation.

    Parameters:
        x_values (array-like): The x values.
        y_values (array-like): The y values.
        ax (matplotlib.axes.Axes): The axes object where the plot will be drawn.

    Returns:
        tuple: A tuple containing the scatter plot object and the regression coefficients.
    """
    
    # Plot scatter plot.
    scatter = ax.scatter(x=x_values, y=y_values)
    
    # Get regression coefficients.
    regression_coefficients = slope, intercept, rvalue, pvalue, stderr = linregress(x_values,y_values)
    
    # Calculate coordinates for the best fit line and plot it.
    x0 = min(x_values)
    x1 = max(x_values)
    y0 = x0*slope + intercept
    y1 = x1*slope + intercept
    best_fit_line=ax.plot([x0, x1], [y0, y1], "r-")
    
    # Annotate the plot with the regression equation
    if intercept >= 0:
        sign = '+'
    else:
        sign = '-'
    regression_formula = f'y = {slope:.2f}x {sign} {abs(intercept):.2f}'
    ax.text(0.05, 0.95, regression_formula, transform=ax.transAxes, fontsize=12, verticalalignment='top')
    
    return scatter, regression_coefficients

def even_odd_correlation(score_list, ax):
    """
    Calculate the correlation between even and odd indexed elements in a list of scores and plot the regression line.

    Parameters:
        score_list (list of lists): A list containing lists of scores.
        ax (matplotlib.axes.Axes): The axes object where the plot will be drawn.

    Returns:
        tuple: A tuple containing the scatter plot object and the regression coefficients.
    """
    
    # Add scores for odd and even questions for each individual.
    odd_scores = []
    even_scores = []
    for scores in score_list:
        odd_score = 0
        even_score = 0
        for i in range(len(scores)):
            if (i+1)%2 != 0:
                odd_score += scores[i]
            else:
                even_score += scores[i]
        odd_scores.append(odd_score/round(len(scores)/2))
        even_scores.append(even_score/int(len(scores)/2))
    
    # Perform linear regression to the two score lists.
    scatter, regression_coefficients = linear_regression(odd_scores, even_scores, ax)
    
    return scatter, regression_coefficients

def add_subpot_label(axes):
    """
    Add subplot labels (a, b, c, ...) to each subplot.

    Parameters:
        axes (list of matplotlib.axes.Axes): A list of axes objects representing subplots.

    Returns:
        None
    """
    
    subplot_labels = ['a', 'b', 'c', 'd']
    for ax, label in zip(axes, subplot_labels[:len(axes)]):
        ax.text(0.5, -0.2, label, transform=ax.transAxes, fontsize=12, va='top', ha='center')
    
    return

def display_figure(fig, fig_id, caption, w=0.6, fs=12):
    """
    Display a matplotlib figure with a caption.

    Parameters:
        fig (matplotlib.figure.Figure): The matplotlib figure to display.
        fig_id (str): The ID of the figure.
        caption (str): The caption to display below the figure.
        w (float, optional): The width of the figure as a fraction of the available space (default is 0.6).
        fs (int, optional): The font size for title, labels, and legend (default is 12).

    Returns:
        None
    """
    
    # Set dpi for saving the figure.
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['savefig.dpi'] = 300
    
    # Create a byte stream to store the figure as PNG image.
    pic_IObytes = io.BytesIO()
    
    # set fontsize for title and labels.      
    for ax in fig.axes:
        text_items = [ax.title, ax.xaxis.label, ax.yaxis.label]
        if ax.get_legend() is not None:
            text_items = text_items + ax.get_legend().get_texts()
        for item in (text_items + ax.get_xticklabels() + ax.get_yticklabels() ):
            item.set_fontsize(fs)
    
    # Add sub plot labels.
    add_subpot_label(fig.axes)
    
    # Save the figure as PNG image.
    plt.savefig(pic_IObytes,  format='png', bbox_inches='tight')
    pic_IObytes.seek(0)
    pic_hash = base64.b64encode(pic_IObytes.read())
    
    # Convert the PNG image to HTML img tag and display with the caption.
    img = f'<img margin="auto" width="{w*100}%" src="data:image/png;base64, {str(pic_hash)[2:-1]}" />'
    caption = f'<figcaption style="text-align: center; font-style: italic;">{caption}</figcaption>'
    display(HTML(f'<br><fig width="100%" id={fig_id}><center>{img}</center> {caption}</fig><br>'))
    
    # Close the figure
    plt.close(fig)
    
    return

def display_image(image_paths, fig_id, caption, layout=(1,1), w=0.6):
    """
    Display multiple images as subplots with captions.

    Parameters:
        image_paths (list of str): A list of paths to the image files.
        fig_ids (list of str): A list of IDs for the subplots.
        captions (list of str): A list of captions for the subplots.
        layout (tuple of int, optional): The layout of the subplots (default is (1, 1)).
        w (float, optional): The width of the subplots as a fraction of the available space (default is 0.6).

    Returns:
        None
    """
    
    # Create a new figure with the specified layout.
    fig, axes = plt.subplots(*layout, figsize=(15, 10))
    
    # Convert axes into list if layout=(1,1).
    if isinstance(axes, plt.Axes):
        axes = [axes]
    
    # Read the image files.
    for image_path, ax in zip(image_paths, axes):
        subplot = mpimg.imread(image_path)
        ax.imshow(subplot)
        ax.axis('off')
        
    # Set dpi for saving the figure.
    plt.rcParams['figure.dpi'] = 300
    plt.rcParams['savefig.dpi'] = 300
    
    # Create a byte stream to store the figure as PNG image.
    pic_IObytes = io.BytesIO()
    
    # Add sub plot labels.
    add_subpot_label(fig.axes)
    
    # Save the figure as PNG image.
    plt.savefig(pic_IObytes,  format='png', bbox_inches='tight')
    pic_IObytes.seek(0)
    pic_hash = base64.b64encode(pic_IObytes.read())
        
    # Convert the PNG image to HTML img tag and display with the caption.
    img = f'<img margin="auto" width="{w*100}%" src="data:image/png;base64, {str(pic_hash)[2:-1]}" />'
    caption = f'<figcaption style="text-align: center; font-style: italic;">{caption}</figcaption>'
    display(HTML(f'<br><fig width="100%" id={fig_id}><center>{img}</center> {caption}</fig><br>'))
    
    return

def display_table(df, table_id, caption, w=1):
    """
    Display a pandas DataFrame as an HTML table with a caption.

    Parameters:
        df (DataFrame): The DataFrame to display.
        table_id (str): The ID of the table.
        caption (str): The caption to display above the table.
        w (float, optional): The width of the table as a fraction of the available space (default is 1).

    Returns:
        None
    """
    
    # Convert the DataFrame to HTML table and display with the caption.
    table = df.round(3).to_html(border=0, classes='table table-striped', justify='center').replace('<table ', f'<table style="margin: auto; width: {w*100}%;" id="{table_id}" ')
    caption = f'<caption style="text-align: center; font-style: italic; font-size:14px;">{caption}</caption>'
    display(HTML(f"""<br>{table[:table.find('>')+1] + caption + table[table.find('>')+1:]}<br>"""))
    
    return

In [2]:
# General data processing.
ANS_result = to_upper_str(drop_empty(pd.read_csv("ANS_Response.csv"), "id"), ["id", "gender"])
Math_result = to_upper_str(drop_empty(pd.read_csv("Math_Ability_Response.csv"), "id"), ["id", "gender"])
Memory_result = to_upper_str(drop_empty(pd.read_csv("Memory_Response.csv"), "username"), ["username", "gender"])
SR_result = to_upper_str(drop_empty(pd.read_csv("Spatial_Reasoning_Response.csv"), "user_id"), ["user_id", "gender"])

ANS_result.rename(columns={'score': 'ANS_score'}, inplace=True)
Math_result.rename(columns={'score': 'Math_score'}, inplace=True)

Memory_result.rename(columns={'username': 'id'}, inplace=True)
Memory_result.rename(columns={'points': 'Memory_score'}, inplace=True)

SR_result.rename(columns={'user_id': 'id'}, inplace=True)
SR_result.rename(columns={'total_score': 'SR_score'}, inplace=True)

main_result = pd.DataFrame()

# Cognitive Test Report

## Introduction

The approximate number system (ANS) is a part of our innate cognition to rapidly and intuitively sense numbers and their relations [Park, & Brannon. (2013)](#park_2013). This sense is active throughout our lives and allows us to estimate huge quantities without counting. ANS application is most evident in the visual context such as estimating the number of dots in a single frame. ANS can operate on any approximation independent of modality [Park, & Brannon. (2013)](#park_2013). For example, estimating the number of voices heard from a recording.

Past studies have shown that mathematical aptitude is influenced by ANS [Libertus, Feigenson, & Halberda. (2011)](#libertus_2011),[Zhang, Liu, Chen, & Zhou. (2019)](#zhang_2019). This relationship has been documented from the earliest developmental stage where studies have shown that ANS accuracy measured as early as 6 months of age (or early nursery school age) provides an indicator of symbolic math performance [Castronovo, & Göbel. (2012)](#castronovo_2012). This is so as ANS helps to aid in children’s formation of imprecise numerical estimation which is utilised in magnitude comparison and mathematical learning [Geary, Saults, Liu, & Hoard. (2000)](#geary_2000).  Previous studies have also found that visual-spatial working memory strongly influences ANS acuity in a study of children who have difficulty in performing arithmetic calculations otherwise known as developmental dyscalculia [Vos, Marinova, De Léon, Sasanguie, & Reynvoet. (2023)](#vos_2023). Building upon this research, the present study aims to investigate the correlation of ANS vs mathematical ability, ANS vs memory and ANS vs spatial reasoning. Based on previous studies, it is expected that mathematical ability has the highest correlation with ANS.

This study also investigates if there is a significant difference between males and females in their ANS aptitude. While some studies report that males performed better in approximate arithmetic due to their greater spatial ability, another study reports that number sense ability between males and females does not differ significantly [Hutchison, Lyons, & Ansari. (2019)](#huchison_2019),[Slimani, Znazen, Bragazzi, Zguira, & Tod. (2018)](#slimani_2018). Therefore, it is anticipated that while ANS aptitude between males and females might differ it will not be significant. As fatigue may impede cognitive tasks, this study also aims to investigate the correlation of fatigue with ANS aptitude [Faber, Maurits, & Lorist. (2012)](#faber_2012). It is expected that more fatigued people tend to score worse on ANS tests.

The significance of this study lies in the understanding of how ANS intertwines with mathematical ability, memory and spatial reasoning. Investigation of gender differences and the effect of fatigue will give insight into how gender-related cognitive differences and external factors affect ANS respectively. Findings from this study may be useful in developing strategies for mathematical education that account for the impact of ANS acuity in different individuals.

## Method

This research aims to assess participants’ cognitive ability via four different tests, including Approximate Number Sense (ANS) test, Math ability test, Memory test, and Spatial reasoning test. Each test takes about approximately 3 minutes to complete.

Our target sample population are mainly students at UCL. To make the sample more representative and meaningful, we not only collected data from BIOS0030 students but also asked other UCL students from different programmes and even different departments to participate in this research. 
 
We applied a tiredness level in the report hypothesis c to find whether different tiredness levels will relate to participants’ performance. Participants will choose the tiredness levels based on the Karolinska Sleepiness Scale from 1 to 10 [Shahid, Wilkinson, Marcu, & Shapiro. (2012)](#kss). The numbers are then used in the data analysis.

### Test details

#### ANS test

ANS is the first test, mainly measuring participants' ability to estimate within a short time. A total of 64 figures with settled ratio numbers and random order will be presented to participants in 0.75s. For each trial, participants will have 3 seconds to consider which sides have more dots and make their final decision by clicking the corresponding buttons. After that, there will be a 1.5s pause between each trial. A seed is created in the code to ensure the reproducibility of this test. More specifically, while the 64 figures come from a random arrangement of 16 pictures with a designed ratio, each participant completes the test using the same order of 64 figures.

In [3]:
caption = "Figure 1: Exemplar ANS test question."
display_image(["./Images/ANS_Question.png"], "fig1", caption, w=0.3)

#### Math ability test

The maths test measures the participant’s mathematical aptitude by answering arithmetic expressions. Each part of the arithmetic expression was shown for 3 seconds one by one and then hidden. The fully formed arithmetic expressions were hidden from the participants as they attempted to input the answers. The arithmetic expression had three levels of difficulty. The first level involved simple addition and subtraction operations with lower two-digit numbers. The second level involved addition and subtraction operations with higher two-digit numbers. The third level involved addition or subtraction with multiplication operations. Besides the score, the average time taken to answer each question was taken. 


In [4]:
caption = "Figure 2: Exemplar math ability test question."
display_image(["./Images/Math_Question.png"], "fig2", caption, w=0.3)

#### Memory test

The memory test measures the participant's ability to memorise several images within a period. A total of 4 images were shown to the participant and in each image, there will be a grid that contains various symbols and numbers. The participants would then be required to memorise every single detail of the symbols such as the colour and the position of the symbols in 20 seconds.  The participant will then be presented with 5 questions that correlate to the image that they had been shown. Every question offered four options, and participants had ten seconds to select the right answer. The difficulty of the test would increase from image to image.

In [5]:
caption = "Figure 3: Exemplar memory test question."
display_image(["./Images/Memory_Question_1.png", "./Images/Memory_Question_2.png"], "fig3", caption, (1,2), w=0.6)

#### Spatial reasoning test

The spatial Reasoning test evaluates a participant's capability to visualize and comprehend three-dimensional space. Participants are presented with a series of 9 questions that involve randomly generated three-dimensional arrangements of cubes. For each question, they are shown 4 two-dimensional images and given 25 seconds to identify the image that cannot be obtained by rotating the given three-dimensional figure. As the test progresses, the complexity of the cube arrangements increases due to an expanding size of the three-dimensional space.

In [6]:
caption = "Figure 4: Exemplar spatial reasoning test question."
display_image(["./Images/SR_Question.png"], "fig4", caption, w=0.3)

## Result

### Split-half reliability test

In [7]:
# Remove outliers based on performance from each test dataset.
ANS_filtered = ANS_result[~ANS_result['id'].isin(get_outliers(ANS_result, ["ANS_score"]))]
Math_filtered = Math_result[~Math_result['id'].isin(get_outliers(Math_result, ["Math_score"]))]
Memory_filtered = Memory_result[~Memory_result['id'].isin(get_outliers(Memory_result, ["Memory_score"]))]
SR_filtered = SR_result[~SR_result['id'].isin(get_outliers(SR_result, ["SR_score"]))]

# Get individual score lists for each test.
ANS_score = []
for score_list in ANS_filtered["correctness"]:
    ANS_score.append([int(score.strip()) for score in score_list.split(',')])
Math_score = []
for score_list in Math_filtered["score_list"]:
    Math_score.append([int(score.strip()) for score in score_list.split(',')])
SR_score = []
for score_list in SR_filtered["score_list"]:
    SR_score.append([int(score.strip()) for score in score_list.split(',')])
memory_score = list(zip(*[Memory_result[col].astype(int) for col in Memory_filtered.columns if col.startswith("Question")]))

In [8]:
# Plot scatter plots depicting the correlation between performances on odd and even-numbered questions.
fig5, axs5 = plt.subplots(1, 4, figsize=(20,5))

ans_scatter, ans_OddvsEven = even_odd_correlation(ANS_score, axs5[0])
math_scatter, math_OddvsEven = even_odd_correlation(Math_score, axs5[1])
memory_scatter, memory_OddvsEven = even_odd_correlation(memory_score, axs5[2])
sr_scatter, sr_OddvsEven = even_odd_correlation(SR_score, axs5[3])

axs5[0].set_title("ANS Test")
axs5[0].set_xlabel("Average Odd Score")
axs5[0].set_ylabel("Average Even Score")
axs5[1].set_title("Math Ability Test")
axs5[1].set_xlabel("Average Odd Score")
axs5[1].set_ylabel("Average Even Score")
axs5[2].set_title("Memory Test")
axs5[2].set_xlabel("Average Odd Score")
axs5[2].set_ylabel("Average Even Score")
axs5[3].set_title("Spatial Reasoning Test")
axs5[3].set_xlabel("Average Odd Score")
axs5[3].set_ylabel("Average Even Score")

caption = "Figure 5: Scatter plots depicting the correlation between performances on odd and even-numbered questions within four cognitive assessments (ANS test, math ability test, memory test, spatial reasoning test)."
display_figure(fig5, "fig5", caption, w=0.8)

"""
To evaluate the reliability of each test, linear regression lines are then produced to compare scores of odd-number and even-number questions. Figure 5 includes four subplots, showing that there is a positive relationship between scores for odd and even number questions. However, the spread of dot points does not show a particular trend. 
""";

To evaluate the reliability of each test, linear regression lines are then produced to compare scores of odd-number and even-number questions. [Figure 5](#fig5) includes four subplots, showing that there is a positive relationship between scores for odd and even number questions. However, the spread of dot points does not show a particular trend. 

In [9]:
# Display Pearson correlation result for Figure 5.
table1_data = {
    "Test Type":["ANS","Math Ability", "Memory", "Spatial Reasoning"],
    "R-value":[ans_OddvsEven[2], math_OddvsEven[2], memory_OddvsEven[2], sr_OddvsEven[2]], 
    "P-value":[ans_OddvsEven[3], math_OddvsEven[3], memory_OddvsEven[3], sr_OddvsEven[3]]
}

caption = "Table 1: Pearson R-values and P-values for the correlation between odd and even question scores across the four tests."

table1 = pd.DataFrame(table1_data).set_index("Test Type")
display_table(table1, "table1", caption, w=0.8)

Unnamed: 0_level_0,R-value,P-value
Test Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ANS,0.469,0.003
Math Ability,0.195,0.19
Memory,0.403,0.004
Spatial Reasoning,0.226,0.185


[Table 1](#table1) is a summary of the results of the hypothesis test. We can find in the table that the ANS test and Memory test have significant p-values under 0.05, which can be further proved reliable. However, the math test and spatial reasoning test do not have significant p-values. 

### Hypothesis A

In [10]:
# Get individuals taking all four tests, and remove outliers based on performance of each test.
ANS_id_set = set(ANS_result["id"].tolist())
Math_id_set = set(Math_result["id"].tolist())
Memory_id_set = set(Memory_result["id"].tolist())
SR_id_set = set(SR_result["id"].tolist())
intersect_id = list(ANS_id_set.intersection(Math_id_set,Memory_id_set,SR_id_set))

main_result["id"] = intersect_id
main_result = main_result.merge(ANS_result[["id", "ANS_score"]], on="id", how="left")
main_result = main_result.merge(Math_result[["id", "Math_score"]], on="id", how="left")
main_result = main_result.merge(Memory_result[["id", "Memory_score"]], on="id", how="left")
main_result = main_result.merge(SR_result[["id", "SR_score"]], on="id", how="left")

outliers = get_outliers(main_result, ["ANS_score", "Math_score","Memory_score", "SR_score"])
main_filtered = main_result[~main_result['id'].isin(outliers)]

In [11]:
# Plot histogram illustrating the distribution of participant scores across four cognitive tests.
fig6, axs6 = plt.subplots(1, 4, figsize=(30,7.5))

ans_hist=axs6[0].hist(main_filtered["ANS_score"], edgecolor='black', linewidth=1.5, bins=np.arange(42,64,2))
axs6[0].set_xticks(range(42,65))
math_hist=axs6[1].hist(main_filtered["Math_score"], edgecolor='black', linewidth=1.5, bins=np.arange(8,16,1))
axs6[1].set_xticks(range(8,16))
memory_hist=axs6[2].hist(main_filtered["Memory_score"], edgecolor='black', linewidth=1.5, bins=np.arange(0,21,2))
axs6[2].set_xticks(range(0,21))
sr_hist=axs6[3].hist(main_filtered["SR_score"], edgecolor='black', linewidth=1.5, bins=np.arange(0,10,1))
axs6[3].set_xticks(range(0,10))

axs6[0].set_title("ANS Test Distribution")
axs6[0].set_xlabel("Score")
axs6[0].set_ylabel("Number of Participants")
axs6[1].set_title("Math Ability Test Distribution")
axs6[1].set_xlabel("Score")
axs6[1].set_ylabel("Number of Participants")
axs6[2].set_title("Memory Test Distribution")
axs6[2].set_xlabel("Score")
axs6[2].set_ylabel("Number of Participants")
axs6[3].set_title("Spatial Reasoning Test Distribution")
axs6[3].set_xlabel("Score")
axs6[3].set_ylabel("Number of Participants")

caption = "Figure 6: Histogram illustrating the distribution of participant scores across four cognitive tests."
display_figure(fig6, "fig6", caption, w=0.8)

"""
Figure 6 shows the distribution of four cognitive tests. Among the four tests, the distribution of the math test is left-skewed while the spatial reasoning test has a right skewness. The remaining two tests, the ANS  test and the memory test, both have normal distributions.
""";

[Figure 6](#fig6) shows the distribution of four cognitive tests. Among the four tests, the distribution of the math test is left-skewed while the spatial reasoning test has a right skewness. The remaining two tests, the ANS  test and the memory test, both have normal distributions.

In [12]:
# Plot scatter plots illustrating the relationship between ANS Test results and performances on the three other cognitive assessments.
fig7, axs7 = plt.subplots(1, 3, figsize=(20,5))
ans_math_scatter, ANSvsMath = linear_regression(main_filtered["ANS_score"], main_filtered["Math_score"], axs7[0])
axs7[0].set_xticks(range(42,65))
ans_memory_scatter, ANSvsMemory = linear_regression(main_filtered["ANS_score"], main_filtered["Memory_score"], axs7[1])
axs7[1].set_xticks(range(42,65))
ans_sr_scatter, ANSvsSR = linear_regression(main_filtered["ANS_score"], main_filtered["SR_score"], axs7[2])
axs7[2].set_xticks(range(42,65))

axs7[0].set_title("ANS Test vs Math Ability Test")
axs7[0].set_xlabel("ANS Score")
axs7[0].set_ylabel("Math Score")
axs7[1].set_title("ANS Test vs Memory Test")
axs7[1].set_xlabel("ANS Score")
axs7[1].set_ylabel("Memory Score")
axs7[2].set_title("ANS Test vs Spatial Reasoning Test")
axs7[2].set_xlabel("ANS Score")
axs7[2].set_ylabel("Spatial Reasoning Score")

caption = "Figure 7: Scatter plots illustrating the relationship between ANS Test results and performances on the three other cognitive assessments."
display_figure(fig7, "fig7", caption, w=0.8)

"""
In Figure 7, the relationship between the ANS test and the rest of the three tests is depicted. Generally, the ANS test is positively related to all other three tests based on figures. However, the spread of dots does not follow a positive relation trend.
""";

In [Figure 7](#fig7), the relationship between the ANS test and the rest of the three tests is depicted. Generally, the ANS test is positively related to all other three tests based on figures. However, the spread of dots does not follow a positive relation trend.

In [13]:
# Display Pearson correlation result for Figure 7.
table2_data = {
    "Test Type":["ANS vs Math Ability","ANS vs Memory", "ANS vs Spatial Reasoning"],
    "R-value":[ANSvsMath[2], ANSvsMemory[2], ANSvsSR[2]], 
    "P-value":[ANSvsMath[3], ANSvsMemory[3], ANSvsSR[3]]
}

caption = "Table 2: Pearson R-values and P-values comparing the correlation of ANS performance with math ability, memory, and spatial reasoning scores."
table2 = pd.DataFrame(table2_data).set_index("Test Type")
display_table(table2, "table2", caption, w=0.8)

Unnamed: 0_level_0,R-value,P-value
Test Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ANS vs Math Ability,0.186,0.353
ANS vs Memory,0.217,0.277
ANS vs Spatial Reasoning,0.261,0.189


In [Table 2](#table2), with the Pearson hypothesis test, we can see the comparison results of the relation between the ANS test and the other three tests. All calculated p-values are larger than 0.05, indicating that the ANS test does not have a significant relation with any of the other three tests. 

In [14]:
# Display Spearman correlation result for Figure 7.
ans_math_corr, ans_math_p_value = spearmanr(main_filtered["ANS_score"], main_filtered["Math_score"])
ans_memory_corr, ans_memory_p_value = spearmanr(main_filtered["ANS_score"], main_filtered["Memory_score"])
ans_sr_corr, ans_sr_p_value = spearmanr(main_filtered["ANS_score"], main_filtered["SR_score"])

table3_data = {
    "Test Type":["ANS vs Math Ability","ANS vs Memory", "ANS vs Spatial Reasoning"],
    "ρ-value": [ans_math_corr, ans_memory_corr, ans_sr_corr],
    "P-value": [ans_math_p_value, ans_memory_p_value, ans_sr_p_value]
}

caption = "Table 3: Spearman R-values and P-values comparing the correlation of ANS performance with math ability, memory, and spatial reasoning scores."
table3 = pd.DataFrame(table3_data).set_index("Test Type")
display_table(table3, "table3", caption, w=0.8)

Unnamed: 0_level_0,ρ-value,P-value
Test Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ANS vs Math Ability,0.136,0.499
ANS vs Memory,0.184,0.36
ANS vs Spatial Reasoning,0.215,0.281


[Table 3](#table3) is a summary table for another hypothesis test, Spearman's rank-order correlation, to evaluate the relation between the ANS test and the other three tests. The p-values are still larger than 0.05, failing to prove a significant relation.

### Hypothesis B

In [15]:
# Get individuals are binary gender, and remove outliers based on performance across genders.
genders = ["MALE","FEMALE"]

ans_outliers = []
for gender in genders:
    ans_outliers += get_outliers(ANS_result[ANS_result["gender"] == gender], ["ANS_score"])
ANS_filtered = ANS_result[~ANS_result['id'].isin(ans_outliers) & ANS_result['gender'].isin(genders)]
ANS_by_gender = [ANS_filtered[ANS_filtered['gender'] == gender]['ANS_score'] for gender in genders]

math_outliers = []
for gender in genders:
    math_outliers += get_outliers(Math_result[Math_result["gender"] == gender], ["Math_score"])
Math_filtered = Math_result[~Math_result['id'].isin(math_outliers) & Math_result['gender'].isin(genders)]
Math_by_gender = [Math_filtered[Math_filtered['gender'] == gender]['Math_score'] for gender in genders]

memory_outliers = []
for gender in genders:
    memory_outliers += get_outliers(Memory_result[Memory_result["gender"] == gender], ["Memory_score"])
Memory_filtered = Memory_result[~Memory_result['id'].isin(memory_outliers) & Memory_result['gender'].isin(genders)]
Memory_by_gender = [Memory_filtered[Memory_filtered['gender'] == gender]['Memory_score'] for gender in genders]

sr_outliers = []
for gender in genders:
    sr_outliers += get_outliers(SR_result[SR_result["gender"] == gender], ["SR_score"])
SR_filtered = SR_result[~SR_result['id'].isin(sr_outliers) & SR_result['gender'].isin(genders)]
SR_by_gender = [SR_filtered[SR_filtered['gender'] == gender]['SR_score'] for gender in genders]

results_by_gender = [ANS_by_gender, Math_by_gender, Memory_by_gender, SR_by_gender]

In [16]:
# Plot boxplots illustrating scores for each test across genders.
fig8, axs8 = plt.subplots(1,4, figsize=(20,5))

axs8[0].boxplot(ANS_by_gender)
axs8[0].set_xticklabels(genders)
axs8[1].boxplot(Math_by_gender)
axs8[1].set_xticklabels(genders)
axs8[2].boxplot(Memory_by_gender)
axs8[2].set_xticklabels(genders)
axs8[3].boxplot(SR_by_gender)
axs8[3].set_xticklabels(genders)

axs8[0].set_title("ANS Test")
axs8[0].set_xlabel("Gender")
axs8[0].set_ylabel("Score")
axs8[1].set_title("Math Ability Test")
axs8[1].set_xlabel("Gender")
axs8[1].set_ylabel("Score")
axs8[2].set_title("Memory Test")
axs8[2].set_xlabel("Gender")
axs8[2].set_ylabel("Score")
axs8[3].set_title("Spatial Reasoning Test")
axs8[3].set_xlabel("Gender")
axs8[3].set_ylabel("Score")

caption = "Figure 8: Boxplots illustrating scores for each test across genders."
display_figure(fig8, "fig8", caption, w=0.8)

"""
Figure 8 includes four subplots to illustrate the performance of males and females in each cognitive test. For the ANS test, the female sample shows a wider range than the male sample and also has a larger median score. In the math test, the male sample has a larger median while the female sample still has a wider range. The range of male and female samples is approximately equal, and the male sample shows a larger median in both the memory and the spatial reasoning test. 
""";

[Figure 8](#fig8) includes four subplots to illustrate the performance of males and females in each cognitive test. For the ANS test, the female sample shows a wider range than the male sample and also has a larger median score. In the math test, the male sample has a larger median while the female sample still has a wider range. The range of male and female samples is approximately equal, and the male sample shows a larger median in both the memory and the spatial reasoning test. 

In [17]:
# Plot dot plots comparing test scores across genders.
fig9, axs9 = plt.subplots(1,4, figsize=(20,5))

axs9[0].plot(ANS_filtered['gender'], ANS_filtered['ANS_score'],'.')
axs9[0].set_xlim(-1,2)
axs9[1].plot(Math_filtered['gender'], Math_filtered['Math_score'],'.')
axs9[1].set_xlim(-1,2)
axs9[2].plot(Memory_filtered['gender'], Memory_filtered['Memory_score'],'.')
axs9[2].set_xlim(-1,2)
axs9[3].plot(SR_filtered['gender'], SR_filtered['SR_score'],'.')
axs9[3].set_xlim(-1,2)

for i, result_by_gender in enumerate(results_by_gender):
    for j, gender in enumerate(genders):
        mean, std_err = get_error_bar(result_by_gender[j])
        axs9[i].errorbar(gender, mean, yerr=std_err, fmt='o', color='red', markersize=5) 

axs9[0].set_title(f"ANS Test(p-value:{ttest_ind(*ANS_by_gender)[1]:.3g})")
axs9[0].set_xlabel("Gender")
axs9[0].set_ylabel("Score")
axs9[1].set_title(f"Math Ability Test(p-value:{ttest_ind(*Math_by_gender)[1]:.3g})")
axs9[1].set_xlabel("Gender")
axs9[1].set_ylabel("Score")
axs9[2].set_title(f"Memory Test(p-value:{ttest_ind(*Memory_by_gender)[1]:.3g})")
axs9[2].set_xlabel("Gender")
axs9[2].set_ylabel("Score")
axs9[3].set_title(f"Spatial Reasoning Test(p-value:{ttest_ind(*SR_by_gender)[1]:.3g})")
axs9[3].set_xlabel("Gender")
axs9[3].set_ylabel("Score")

caption = "Figure 9: Dot plots comparing test scores across genders."
display_figure(fig9, "fig9", caption, w=0.8)

"""
We can see the results of the comparison of performances by gender in Figure 9. Among the four cognitive tests, only the math test has a significant p-value, smaller than 0.05, indicating that the differences in math test performances resulting from different genders are significant. 
""";

We can see the results of the comparison of performances by gender in [Figure 9](#fig9). Among the four cognitive tests, only the math test has a significant p-value, smaller than 0.05, indicating that the differences in math test performances resulting from different genders are significant. 

In [18]:
# Display summary of data for Figure 8 & 9.
table4_data = {
    "Test Type": ["ANS Test", "Math Ability Test", "Memory Test", "Spatial Reasoning Test"],
    "Mean Male": [], 
    "SD Male": [],
    "Mean Female": [],
    "SD Female": [],
    "Mean Difference": [],
    "P-value": [],
    "T-statistic": []
}

for result_by_gender in results_by_gender:
    t_stat, p_value = ttest_ind(result_by_gender[0], result_by_gender[1])
    table4_data["Mean Male"].append(result_by_gender[0].mean())
    table4_data["SD Male"].append(result_by_gender[0].mean())
    table4_data["Mean Female"].append(result_by_gender[1].mean())
    table4_data["SD Female"].append(result_by_gender[1].mean())
    table4_data["Mean Difference"].append(result_by_gender[0].mean()-result_by_gender[1].mean())
    table4_data["P-value"].append(p_value)
    table4_data["T-statistic"].append(t_stat)
    
table4 = pd.DataFrame(table4_data).set_index("Test Type")
caption = "Table 4: Summary of mean scores, standard deviations, score differences, and p-values for each test, segmented by gender."
display_table(table4, "table4", caption, w=1)

Unnamed: 0_level_0,Mean Male,SD Male,Mean Female,SD Female,Mean Difference,P-value,T-statistic
Test Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ANS Test,51.941,51.941,51.778,51.778,0.163,0.907,0.118
Math Ability Test,13.08,13.08,12.0,12.0,1.08,0.023,2.347
Memory Test,10.643,10.643,10.273,10.273,0.37,0.697,0.392
Spatial Reasoning Test,2.833,2.833,2.667,2.667,0.167,0.799,0.257


[Table 4](#table4) summarises the data required for a two-sample t-test and the hypothesis test results. 

### Hypothesis C

In [19]:
# Get tiredness levels with entries, and remove outliers based on performance of ANS test across tiredness levels.
tiredness_levels = sorted(ANS_result['tiredness'].unique())
outliers = []
for level in tiredness_levels:
    outliers += get_outliers(ANS_result[ANS_result["tiredness"] == level], ["ANS_score"])
ANS_filtered = ANS_result[~ANS_result['id'].isin(outliers)]

In [20]:
# Plot boxplot and dot plot comparing score distribution and central tendencies in ANS tests across tiredness levels。
fig10, axs10 = plt.subplots(1, 2,figsize=(15, 5))

ANS_by_tiredness = [ANS_filtered[ANS_filtered['tiredness'] == level]['ANS_score'] for level in tiredness_levels]

axs10[0].boxplot(ANS_by_tiredness)
axs10[0].set_xticklabels(tiredness_levels)

axs10[1].plot(ANS_filtered['tiredness'], ANS_filtered['ANS_score'],'.')
for i, level in enumerate(tiredness_levels):
    mean, std_err = get_error_bar(ANS_by_tiredness[i])
    axs10[1].errorbar(i+1, mean, yerr=std_err, fmt='o', color='red', markersize=3)    

axs10[0].set_title("Boxplot of ANS performance according to tiredness level")
axs10[0].set_xlabel("Tiredness Level")
axs10[0].set_ylabel("Score")
axs10[1].set_title(f"Dot plot of ANS performance according to tiredness level(p-value:{f_oneway(*ANS_by_tiredness)[1]:.3g})")
axs10[1].set_xlabel("Tiredness Level")
axs10[1].set_ylabel("Score")

caption = "Figure 10: Analysis of score distribution and central tendencies in ANS tests by tiredness levels, featuring a boxplot for dispersion on the left and a dot plot showing means and error bars on the right."
display_figure(fig10, "fig10", caption, w=0.8)

"""
Hypothesis C is designed to find the relation between different tiredness levels and participants' performance on the ANS test. Figure 10 contains two subplots, a and b, to show the results. From the subplot a, we can see that participants have similar medians from level 1 to level 6, but the scores median drop significantly for level 7 and 8. Subplot b is a dot plot with error bars showing the same comparison.
""";

Hypothesis C is designed to find the relation between different tiredness levels and participants' performance on the ANS test. [Figure 10](#fig10) contains two subplots, a and b, to show the results. From the subplot a, we can see that participants have similar medians from level 1 to level 6, but the scores median drop significantly for level 7 and 8. Subplot b is a dot plot with error bars showing the same comparison.

In [21]:
# Display ANOVA test results for Figure 10
ans_f_statistic, ans_p_value = f_oneway(*ANS_by_tiredness)

table5_data = {
    "Test Type":["ANOVA Test"],
    "F-statistic": [ans_f_statistic],
    "P-value": [ans_p_value]
}

table5 = pd.DataFrame(table5_data).set_index("Test Type")
caption = "Table 5: ANOVA test results showing F-statistic and P-value for differences in ANS performance across varying levels of tiredness"
display_table(table5, "table5", caption, w=0.8)

Unnamed: 0_level_0,F-statistic,P-value
Test Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ANOVA Test,7.654,0.0


Because hypothesis c is a comparison between more than two items, the ANOVA test is used rather than the normal t-test. [Table 5](#table5) demonstrating the final result of the ANOVA test. The p-value is approximately 0.0, indicating that we can reject the null hypothesis and conclude that differences in tiredness levels have significant effects on participants' performance on the ANS test.

## Discussion

The study investigates the correlation of individuals’ ANS performance with math, memory and spatial reasoning tests. Contrary to earlier hypotheses, analysis shows no statistically significant correlation between ANS and the cognitive abilities under the conditions of the study. The lack of correlation with mathematical ability contrasts with previous study where ANS is causally related to math performance determined by standardized math tests, after accounting for general intelligence and verbal ability [Park, & Brannon. (2013)](#park_2013),[Libertus, Feigenson, & Halberda. (2011)](#libertus_2011). The link is suggested to stem from visual form perception as measured by visual short-term memory tasks [Zhang, Liu, Chen, & Zhou. (2019)](#zhang_2019). However, spatial processing measured by spatial short-term memory holds little association [Zhang, Liu, Chen, & Zhou. (2019)](#zhang_2019). However, other studies have found that ANS acuity does not necessarily correlate with greater mathematical achievement in adults, justifying that ANS acuity likely reaches peak in early adulthood [Castronovo, & Göbel. (2012)](#castronovo_2012). Exact number system (ENS) which is a  formal, symbolic, linear, and later-acquired numerical system is hypothesized to play a greater role in symbolic numerical abilities associated with mathematical ability among adults compared to ANS [Castronovo, & Göbel. (2012)](#castronovo_2012).

Investigation on the gender-difference in the test scores shows that only the math test scores show statistically significant differences in genders, suggesting that males may show greater aptitude in arithmetic fluency. This finding is supported by studies that show men having higher scores for spatial cognition and visuo-spatial working memory which contributes to mathematical performanc[Geary, Saults, Liu, & Hoard. (2000)](#geary_2000).  However, there is a discourse on how far the difference is intrinsic in nature. Research shows that math anxiety and gender-related stereotypes relate to the underperformance of females in mathematical test[Vos, Marinova, De Léon, Sasanguie, & Reynvoet. (2023)](#vos_2023). Moreover, it is found that differences are not present in ealry childhood, suggesting that there is no definite evidence to difference in cognitive ability is due to intrinsic gender differenc[Hutchison, Lyons, & Ansari. (2019)](#huchison_2019). 

Furthermore, the investigation reveals statistically significant differences in performance of the ANS test between tiredness levels. The finding indicates that participants with higher tiredness level will perform worse in the tests is consistent with previous studies that have shown a decline in performance among individuals displaying signs of fati[Slimani, Znazen, Bragazzi, Zguira, & Tod. (2018)](#slimani_2018). They are more prone to committing more errors during the test which may arise from the cognitive strain associated with the rapid analysis of images, which limits the brain processing ability [Faber, L. G., Maurits, N. M., & Lorist, M. M. (2012)](#faber_2012).

Limitation and flaws were recognized in this study such as evaluating tiredness through the subjective KSS scale which presents certain constraints in accurately capturing the participants actual fatigue levels. The accuracy of the scales is restricted to a distinct rating category that is incapable of capturing subtle changes in the level of tiredness of the participants during the tests. One can enhance the comprehensiveness and validity of data by integrating subjective tiredness scales with objective measures, such as actigraphy or physiological assessments. 

Moreover, insufficient sample sizes also pose certain constraints in delivering precise interpretations of the results. These constraints are evident in Hypotheses B, wherein an outlier emerges after the elimination of outliers from the initial dataset of the ANS test. This is due to the small number of data (~40) which makes the emergence of new outliers more pronounced after reducing data points.While the presence of a new outlier does not have a substantial impact on the results of this report, it is advisable to  ensure a big sample size is used in future investigations.

Furthermore, the lack of reliability in Math and ANS tests present significant limitations undermine the validity of data-analysis conclusions, making it difficult to accurately evaluate hypotheses. Since this issue stems from an imbalance in difficulty level of the questions, the ability of the participants would not be properly assessed. Future tests could be made to have a balanced difficulty range to increase reliability of the tests


## Summary and Outlook
The findings from this study offers insights into ANS association with cognitive abilities, and how far it is influenced by difference in gender and fatigue. This helps us to understand how ANS may operate with different people under different circumstances Moreover, by focusing on a demographic of individuals aged 19-25, this study offers a perspective on the characteristics of fully developed ANS ability and cognitive maturity. This study also provides a foundation for more comprehensive research to be conducted. Among them is longitudinal studies on ANS alongside ENS development. Investigating the evolution of ANS and ENS with age and education level may provide essential information about how numerical approximation skills develop. Besides that, studies should be done on adults with developmental dyscalculia, a math learning disorder, to investigate the disorder’s links with ANS impairment. This could lead to better intervention strategies to help adults overcome dyscalculia.

The importance of ANS in everyday decision making and educational development makes this area of research vital. Adding to our understanding of ANS results in better educational strategies to approach STEM education in all levels with keeping the variability of ANS acuity of different individuals in mind.

## References

1. <a name="park_2013" href="https://doi.org/10.1177/0956797613482944">
Park, J., & Brannon, E. M. (2013)</a>
Training the approximate number system improves math proficiency.
Psychological science, 24(10).
<br>

2. <a name="libertus_2011" href=" https://doi.org/10.1111/j.1467-7687.2011.01080.x">
Libertus, M. E., Feigenson, L., & Halberda, J. (2011)</a>
Preschool acuity of the approximate number system correlates with school math ability.
Developmental science, 14(6), 1292-1300.
<br>

3. <a name="zhang_2019" href="https://doi.org/10.1016/j.lindif.2019.02.008">
Zhang, Y., Liu, T., Chen, C., & Zhou, X. (2019)</a>
Visual form perception supports approximate number system acuity and arithmetic fluency.
Learning and Individual Differences, 71, 1-12.
<br>

4. <a name="castronovo_2012" href="https://doi.org/10.1371/journal.pone.0033832">
Castronovo, J., & Göbel, S. M. (2012)</a>
Impact of high mathematics education on the number sense.
PloS one, 7(4), e33832.
<br>

5. <a name="geary_2000" href="https://doi.org/10.1006/jecp.2000.2594">
Geary, D. C., Saults, S. J., Liu, F., & Hoard, M. K. (2000)</a>
Sex differences in spatial cognition, computational fluency, and arithmetical reasoning.
Journal of Experimental child psychology, 77(4), 337-353.
<br>

6. <a name="vos_2023" href="https://doi.org/10.1016/j.lindif.2022.102255">
Vos, H., Marinova, M., De Léon, S. C., Sasanguie, D., & Reynvoet, B. (2023)</a>
Gender differences in young adults' mathematical performance: Examining the contribution of working memory, math anxiety and gender-related stereotypes.
Learning and Individual Differences, 102, 102255.
<br>

7. <a name="hutchison_2019" href=" https://doi.org/10.1111/cdev.13044">
Hutchison, J. E., Lyons, I. M., & Ansari, D. (2019)</a>
More similar than different: Gender differences in children's basic numerical skills are the exception not the rule.
Child development, 90(1), e66-e79.
<br>

8. <a name="slimani_2018" href="https://doi.org/10.3390/jcm7120510">
Slimani, M., Znazen, H., Bragazzi, N. L., Zguira, M. S., & Tod, D. (2018)</a>
The effect of mental fatigue on cognitive and aerobic performance in adolescent active endurance athletes: insights from a randomized counterbalanced, cross-over trial.
Journal of clinical medicine, 7(12), 510.
<br>

9. <a name="faber_2012" href="https://doi.org/10.1371/journal.pone.0048073">
Faber, L. G., Maurits, N. M., & Lorist, M. M. (2012)</a>
Mental fatigue affects visual selective attention.
PloS one, 7(10), e48073.
<br>

10. <a name="kss" href="https://www.google.com/books/edition/STOP_THAT_and_One_Hundred_Other_Sleep_Sc/GbFRmzJ92NMC?hl=en&gbpv=0">
Shahid, A., Wilkinson, K., Marcu, S., & Shapiro, C. M. (Eds.). (2012)</a>
STOP, THAT and one hundred other sleep scales.
Springer Science & Business Media.
<br>