In [7]:
import sys
sys.path.append("../")
from calculate_error import *
from IPython.display import Markdown, display
from sklearn.linear_model import Ridge

In [8]:
collect_model_vicon_csvs()
movements = next(os.walk(GEN_PATH))[1]

for _ in range(20):
    alpha = round(random.uniform(0, 5), 2)
    display(Markdown(f"# Trying ridge regression with alpha {alpha}"))
    for movement in movements:
        assert movement in MOVEMENTS
        movement_path = f"{GEN_PATH}/{movement}/"
        # display(Markdown(f"## Evaluating error for {movement}\n"))
        model_output_path = movement_path + "model/"
        vicon_output_path = movement_path + "vicon/"
        filtered_trial_names = get_filtered_trials(movement=movement)
        num_trials = len(filtered_trial_names)

        random.shuffle(filtered_trial_names)
        num_train_trials = round(
            0.80 * num_trials
        )  # assuming 80-20 split for train and test
        train_trials = filtered_trial_names[:num_train_trials]
        test_trials = filtered_trial_names[num_train_trials:]
        train_trials.sort()
        test_trials.sort()
        xtrain, ytrain = [], []
        reg = None
        filtered_trial_names = train_trials + test_trials

        summary = {"AvgRMSE": 0, "AvgMAE": 0}
        for trial_num, trial_name in enumerate(filtered_trial_names):
            # print(f"\n===== Trial {trial_name} =====")
            # This trial_name should be present in both model and vicon sub-dirs.
            model_csv = model_output_path + f"{trial_name}.csv"
            vicon_csv = vicon_output_path + f"{trial_name}.csv"
            # Frame number for vicon csv files start from 1, and frame number for
            # model csv files start from 0. We need to make adjustments. Frame 0 of
            # model refers to frame 1 of vicon.
            model_df = pd.read_csv(model_csv, index_col=0)
            model_df.index = pd.Index(range(1, len(model_df.index) + 1))

            # If frame number x has NaN for model, then we should drop frame number
            # x from both model and vicon dataframes.
            joint_name = [col for col in model_df.columns if movement[:3] in col][0]
            joint_angles_from_model = model_df[joint_name]
            joint_angles_from_model.dropna(
                inplace=True
            )  # drops rows (frames) with NaN in joint_name column from model dataframe

            # pick index (frames or rows) without NaN in model AND present in vicon csv
            vicon_df = pd.read_csv(
                vicon_csv, header=None, index_col=0, names=[joint_name]
            )
            vicon_df.dropna(inplace=True)

            if vicon_df.empty:
                print(
                    f"Trial {trial_name} has empty vicon data. Please check. Skipping."
                )
                continue

            frames_to_consider = joint_angles_from_model.index.intersection(
                vicon_df.index
            )

            ytrue = vicon_df[joint_name][frames_to_consider].values
            ypred = joint_angles_from_model[frames_to_consider].values

            metrics = None
            if trial_name in train_trials:
                ytrain = np.append(ytrain, ytrue)
                xtrain = np.append(xtrain, ypred)
            else:
                assert trial_name in test_trials
                assert trial_num >= num_train_trials
                if reg is None:
                    # Train a linear regression model for post-processing.
                    # reg = LinearRegression().fit(xtrain.reshape(-1, 1), ytrain)
                    reg = Ridge(alpha=alpha).fit(xtrain.reshape(-1, 1), ytrain)
                    # print(
                    #     f"\nRegression score after training on {num_train_trials} trials from {movement} is {reg.score(xtrain.reshape(-1, 1), ytrain)}.\n\tTotal trials = {num_trials}.\n\tTraining trials = {len(train_trials)}.\n\tTesting trials = {len(test_trials)}.\n"
                    # )
                ypred_reg = reg.predict(ypred.reshape(-1, 1))
                metrics = get_stats(ytrue=ytrue, ypred=ypred_reg)

            if metrics is not None:
                # print(f"Metrics for trial {trial_name}, movement {movement} is {metrics}.")
                summary["AvgRMSE"] += metrics["RMSE"]
                summary["AvgMAE"] += metrics["MAE"]

        if test_trials:
            summary["AvgRMSE"] = round(summary["AvgRMSE"] / len(test_trials), 2)
            summary["AvgMAE"]  = round(summary["AvgMAE"] / len(test_trials), 2)
            print(f"Summary metrics for {movement}: {summary}")

# Trying ridge regression with alpha 2.2166497623220467

Summary metrics for shoabd: {'AvgRMSE': 3.71, 'AvgMAE': 2.94}
Summary metrics for shoflex: {'AvgRMSE': 6.69, 'AvgMAE': 5.1}
Summary metrics for shoext: {'AvgRMSE': 2.81, 'AvgMAE': 2.41}
Summary metrics for elbflex: {'AvgRMSE': 7.17, 'AvgMAE': 6.11}


# Trying ridge regression with alpha 4.826679883012554

Summary metrics for shoabd: {'AvgRMSE': 3.35, 'AvgMAE': 2.74}
Summary metrics for shoflex: {'AvgRMSE': 6.22, 'AvgMAE': 5.15}


KeyboardInterrupt: 