In [None]:
# setup
import sys
import os
from scipy.stats import linregress
import matplotlib.pyplot as plt
from multiple_regression import format_title

# Add project root to path (if needed)
sys.path.append("/Users/gilanorup/Desktop/Studium/MSc/MA/code/masters_thesis_gn/src")

from config.constants import GIT_DIRECTORY
from regression.multiple_regression import run_multiple_regression

# Set task name
task_name = "cookieTheft"

# Run regression and get all relevant variables
model, X_scaled, y, X_train, X_test, y_train, y_test = run_multiple_regression(
    features_path=os.path.join(GIT_DIRECTORY, f"results/features/{task_name}.csv"),
    scores_path=os.path.join(GIT_DIRECTORY, "resources/language_scores_all_subjects.csv"),
    target="PhonemicFluencyScore",
    output_dir=os.path.join(GIT_DIRECTORY, "results/regression"),
    task_name=task_name,
    save_outputs=False
)



def plot_single_feature(df, feature, target, output_dir=None, task_name=None):
    """
    Creates a scatter plot for a single feature vs. target score.

    Arguments:
    - df: DataFrame with your data
    - feature: name of the feature column
    - target: name of the target column (e.g., language score)
    - output_dir: where to save the plot (optional)
    - task_name: optional name of the task (for labeling)
    """
    os.makedirs(output_dir, exist_ok=True)

    # labels
    feature_label = format_title(feature)
    target_label = format_title(target)
    task_label = format_title(task_name) if task_name else ""

    x = df[feature]
    y = df[target]

    # fit regression line
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    line = slope * x + intercept

    plt.figure(figsize=(6, 6))
    plt.scatter(df[feature], df[target], alpha=0.7, color='steelblue', s=20)
    plt.plot(x, line, color="darkred", linewidth=2, linestyle="-", label=f"Regression line (RÂ²={r_value ** 2:.2f})")

    plt.xlabel(feature_label, fontsize=12, fontweight="bold", labelpad=10)
    plt.ylabel(target_label, fontsize=12, fontweight="bold", labelpad=10)
    title = f"{task_label}: {feature_label} vs. {target_label}" if task_label else f"{feature_label} vs. {target_label}"
    plt.title(title, fontsize=14, fontweight="bold", pad=15)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    plt.grid(True)
    plt.legend(fontsize=10)

    filename = f"{task_name}_{feature}_vs_{target}.png" if task_name else f"{feature}_vs_{target}.png"
    plt.savefig(os.path.join(output_dir, filename), dpi=300)
    plt.close()