In [None]:
# Import packages

import os

import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as plt
import seaborn as sns

# Customize Pandas
# pd.options.display.float_format = '{:,.2f}'.format

# Customize Seaborn
sns.set_style("darkgrid")
sns.set_context("paper", font_scale=1.2)

### Data preprocessing

In [None]:
files = [file for file in os.listdir(os.getcwd() + "\HEBBdataDivided") if ".txt" in file]

In [None]:
correct_occurrences = {i: [0 for j in range(9)] for i in range(12)}
correct_opportunities = {i: [0 for j in range(9)] for i in range(12)}

minus_occurrences = {i: [0 for j in range(8)] for i in range(12)}
minus_opportunities = {i: [0 for j in range(8)] for i in range(12)}

plus_occurrences = {i: [0 for j in range(8)] for i in range(12)}
plus_opportunities = {i: [0 for j in range(8)] for i in range(12)}

correct_slopes = []
error_slopes = []

repeated = [i for i in range(1, 49) if i % 4 == 0]

In [None]:
for file in files:
    with open(os.getcwd() + "\HEBBdataDivided\\" + file) as f:
        correct_count = [0 for i in range(9)]
        minus_count = [0 for i in range(8)]
        plus_count = [0 for i in range(8)]
        for line in f:
            splitted = line.strip().split(" ")
            if splitted[0] == "Clicked" and int(splitted[1].strip("():")) in repeated:
                answers = splitted[2:]
                for i in range(9):
                    correct_opportunities[correct_count[i]][i] += 1
                    if correct[i] == answers[i]:
                        correct_occurrences[correct_count[i]][i] += 1
                        correct_count[i] += 1
                for i in range(8):
                    minus_opportunities[minus_count[i]][i] += 1
                    plus_opportunities[plus_count[i]][i] += 1
                    if correct[i] == answers[i + 1]:
                        minus_occurrences[minus_count[i]][i] += 1
                        minus_count[i] += 1
                    if correct[i + 1] == answers[i]:
                        plus_occurrences[plus_count[i]][i] += 1
                        plus_count[i] += 1
            elif splitted[0] == "Said" and int(splitted[1].strip("():")) == 4:
                correct = splitted[2:]

In [None]:
with open("correct.txt", mode="w") as f:
    probabilities = []
    f.write("\t".join(["Serial_position"] + [str(i) for i in range(1, 5)]) + "\n")
    for i in range(9):
        f.write(str(i + 1))
        row_probability = []
        for j in range(1, 5):
            f.write("\t" + str(correct_occurrences[j][i] / correct_opportunities[j][i]))
            # append probabilities for j occurence
            row_probability.append(
                correct_occurrences[j][i] / correct_opportunities[j][i]
            )
        f.write("\n")
        # append probabilities for i "row/serial position"
        probabilities.append(row_probability)
    # calculate slope for each "row/serial position"
    x = [1, 2, 3, 4]
    correct_slopes = []
    for i in range(0, 9):
        slope, intercept = np.polyfit(x, probabilities[i], 1)
        correct_slopes.append(slope)
        print("Correct Slope " + str(i + 1) + ": " + str(slope))

In [None]:
with open("error.txt", mode="w") as f:

    probabilities = []

    f.write("\t".join(["Transposition"] + [str(i) for i in range(1, 5)]) + "\n")
    
    for i in range(8):
        f.write("T{} + 1".format(i + 1))
        row_probability = []
        for j in range(1, 5):
            if plus_opportunities[j][i]:
                result = str(plus_occurrences[j][i] / plus_opportunities[j][i])

                # append probabilities for j occurence
                row_probability.append(
                    plus_occurrences[j][i] / plus_opportunities[j][i]
                )
            else:
                result = "NA"
            f.write("\t" + result)

        # append probabilities for i "row/transposition"
        probabilities.append(row_probability)
        row_probability = []
        f.write("\n")
        f.write("T{} - 1".format(i + 2))
        for j in range(1, 5):
            if minus_opportunities[j][i]:
                result = str(minus_occurrences[j][i] / minus_opportunities[j][i])
                # append probabilities for j occurence
                row_probability.append(
                    minus_occurrences[j][i] / minus_opportunities[j][i]
                )
            else:
                result = "NA"
            f.write("\t" + result)

        # append probabilities for i "row/transposition"
        probabilities.append(row_probability)
        f.write("\n")

    # calculate slope for each "row/serial position"
    for i in range(0, 16):
        slope, intercept = np.polyfit(
            list(range(1, len(probabilities[i]) + 1)), probabilities[i], 1
        )
        error_slopes.append(slope)
        print("Error Slope " + str(i + 1) + ": " + str(slope))

In [None]:
# Writes new file "correctwslopes.txt" with slope values
with open("correct.txt", "r") as src:
    with open("correctwslopes.txt", "w") as dest:
        for i, line in enumerate(src):
            if i == 0:
                dest.write("%s%s\n" % (line.rstrip("\n"), "\tSlope"))
            else:
                dest.write("%s%s\n" % (line.rstrip("\n"), "\t"+str(correct_slopes[i-1])))

In [None]:
# Writes new file "errorwslopes.txt" with slope values
with open("error.txt", "r") as src:
    with open("errorwslopes.txt", "w") as dest:
        for i, line in enumerate(src):
            if i == 0:
                dest.write("%s%s\n" % (line.rstrip("\n"), "\tSlope"))
            else:
                dest.write("%s%s\n" % (line.rstrip("\n"), "\t"+str(error_slopes[i-1])))

### Correct Answers

In [None]:
# read corect.txt as dataframe
df_correct = pd.read_csv("correct.txt", sep = "\t")
df_correct

In [None]:
# Unpivot the DataFrame from wide to long format
df_correct_long = df_correct.melt(id_vars=["Serial_position"])
df_correct_long["label"] = "Correct"
df_correct_long

### Errors

In [None]:
# Read error.txt as dataframe
df_error = pd.read_csv("error.txt", sep = "\t")
df_error

In [None]:
# Unpivot the DataFrame from wide to long format
df_error_long = df_error.melt(id_vars=["Transposition"])
df_error_long["label"] = "Errors"
df_error_long

### Visualization

In [None]:
# Concatenate df_correct and df_error
df_correct_error = pd.concat([df_correct_long, df_error_long], axis=0)
df_correct_error = df_correct_error.drop(["Serial_position", "Transposition"], axis=1)
df_correct_error

In [None]:
# Perform a linear regression of correct slopes 
df_correct_slopes = pd.Series((df_correct.iloc[:, 1:].mean())).reset_index(drop=True)
x1 = df_correct_slopes.index; y1 = df_correct_slopes
res1 = stats.linregress(x=x1, y=y1)
res1

In [None]:
# Perform a linear regression of errors slopes 
df_error_slopes = pd.Series((df_error.iloc[:, 1:].mean())).reset_index(drop=True)
x2 = df_error_slopes.index; y2 = df_error_slopes
res2 = stats.linregress(x=x2, y=y2)
res2

In [None]:
fig, ax = plt.subplots(figsize=(6, 7))

# Plot strip plot of both correct answers and errors
sns.stripplot(data= df_correct_error, y="value", x="variable", 
                                      hue="label", jitter=False,
                                      palette=["chocolate", "midnightblue"],
                                      marker="s")

# Plot linear regression of correct slopes
ax.plot(x1, res1.intercept + res1.slope * x1, 'chocolate', label="Average Slope (Correct)")

# Plot linear regression of errors slopes
ax.plot(x2, res2.intercept + res2.slope * x2, 'darkblue', label="Average Slope (Errors)")
ax.lines[1].set_linestyle("--")

# Customize ticks of y axis
plt.ylim([0, 1])
plt.yticks(np.arange(0.0, 1.1, 0.1))

# Customize labels of x axis and y axis
plt.xlabel("Number of times was previously recalled")
plt.ylabel("Probability of recalling an item")

# Customize legend
handles, labels = plt.gca().get_legend_handles_labels()
order = [2, 3, 0, 1]
plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc='upper left')

plt.margins(0.3)
plt.tight_layout()
plt.show()

### Table 2

In [None]:
df_correct_slopes

In [None]:
df_correct["Slope"] = correct_slopes
df_correct

### Table 3

In [None]:
df_error["Slope"] = error_slopes
df_error

### Data Analysis

#### 