In [1]:
import os
import shutil
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics.pairwise import cosine_similarity


# Fitting PTOA vector to the "mean" knee

## B-only

In [None]:
knees = ['aclr', 'clat', 'ctrl']
timepoints = np.array([0.75, 3, 9, 18, 30]) # Timepoints in months

for knee in knees:
    latent_paths = f'/dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BO_model/shape_change_between_knees_raw_by_visit/mean_latent_{knee}.npz'
    b_vector_path = '/dataNAS/people/anoopai/KneePipeline/BSCORE_MODELS/NSM_Orig_BScore_Bone_Only_Jan_2025/bscore_vector.npy'

    latents = np.load(latent_paths)
    bvector= np.load(b_vector_path)

    latents = [
        latents['Visit-1'],
        latents['Visit-2'],
        latents['Visit-3'],
        latents['Visit-4'],
        latents['Visit-5']
    ]

    Y = np.stack(latents).squeeze()          # (5, 512)
    X = timepoints.reshape(-1, 1)            # (5, 1)

    reg = LinearRegression()
    reg.fit(X, Y)

    ptoa_vector = reg.coef_.reshape(-1)  # Coefficients for each feature (timepoint)

    similarity = cosine_similarity(bvector.reshape(1, -1), ptoa_vector.reshape(1, -1))[0, 0]
    print('Bone-only NSM')
    print(knee)
    print('Cosine Similarity:', similarity)

## B+C

In [None]:
knees = ['aclr', 'clat', 'ctrl']
timepoints = np.array([0.75, 3, 9, 18, 30]) # Timepoints in months

for knee in knees:
    latent_paths = f'/dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BC_model/shape_change_between_knees_raw_by_visit/mean_latent_{knee}.npz'
    b_vector_path = '/dataNAS/people/anoopai/KneePipeline/BSCORE_MODELS/NSM_Orig_BScore_Nov_2024/bscore_vector.npy'

    latents = np.load(latent_paths)
    bvector= np.load(b_vector_path)

    latents = [
        latents['Visit-1'],
        latents['Visit-2'],
        latents['Visit-3'],
        latents['Visit-4'],
        latents['Visit-5']
    ]

    Y = np.stack(latents).squeeze()          # (5, 512)
    X = timepoints.reshape(-1, 1)            # (5, 1)

    reg = LinearRegression()
    reg.fit(X, Y)

    ptoa_vector = reg.coef_.reshape(-1)  # Coefficients for each feature (timepoint)

    similarity = cosine_similarity(bvector.reshape(1, -1), ptoa_vector.reshape(1, -1))[0, 0]
    print('Bone-and-cartilage NSM')
    print(knee)
    print('Cosine Similarity:', similarity)

# Fitting Line to each subject - Linear Mixed effects Model with Random effect (slope)

## B-only

In [5]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from tqdm import tqdm
import matplotlib.pyplot as plt
import warnings
import os
from scipy.spatial.distance import cosine

data_path = '/dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BO_model/shape_change_between_knees_raw_by_visit'
latent_path = os.path.join(data_path, 'latents_subjects.npz')
b_vector_path = b_vector_path = '/dataNAS/people/anoopai/KneePipeline/BSCORE_MODELS/NSM_Orig_BScore_Bone_Only_Jan_2025/bscore_vector.npy'

# Load your OAI b-vector
b_vector = np.load(b_vector_path)

# ------ 1. Load data ------
npz = np.load(latent_path, allow_pickle=True)
subjects = npz['subjects']
visits = npz['visits']
knees = npz['knees']
latents = npz['latents']  # shape (N, 512)

visit_to_month = {1: 0.75, 2: 3, 3: 9, 4: 18, 5: 30}

# ------ 2. Dataframe build ------
df = pd.DataFrame({
    "Subject": subjects.astype(str),
    "Visit": visits,
    "Knee": knees.astype(str),
})
latents = latents.squeeze()
latent_colnames = [f"latent_{i}" for i in range(latents.shape[1])]
latent_df = pd.DataFrame(latents, columns=latent_colnames)
df = pd.concat([df.reset_index(drop=True), latent_df.reset_index(drop=True)], axis=1)

df['Visit'] = df['Visit'].astype(int)
df['Time'] = df['Visit'].map(visit_to_month)
df['Subject_Knee'] = df['Subject'] + "_" + df['Knee']

# ------ 3. Label Knee Types ------
df_temp = pd.DataFrame({'Subject': subjects.astype(str), 'Knee': knees.astype(str)})
df_counts = df_temp.groupby('Subject')['Knee'].unique()
aclr_subjects = set(df_counts[df_counts.apply(lambda arr: (('aclr' in arr) and ('clat' in arr)))].index)
control_subjects = set(df_counts[df_counts.apply(lambda arr: (len(arr)==1 and arr[0]=='ctrl'))].index)

def get_knee_type(row):
    if row['Subject'] in aclr_subjects:
        if row['Knee'].lower() == 'aclr':
            return 'aclr'
        else:
            return 'clat'
    elif row['Subject'] in control_subjects:
        return 'ctrl'
    else:
        return 'unknown'

df['KneeType'] = df.apply(get_knee_type, axis=1)
df = df[df['KneeType'] != 'unknown'].copy()

# ------ 4. Calculate mean latent trajectory per knee type ------
kneetypes = ['aclr', 'clat', 'ctrl']
mean_response_dict = {}

for kneetype in kneetypes:
    print(f'Fitting model for kneetype: {kneetype}')
    df_sub = df[df['KneeType'] == kneetype].copy()
    mean_responses = np.full(latents.shape[1], np.nan)  # (512, )
    converged_latents = []
    failed_latents = []
    log_details = []

    for i in tqdm(range(latents.shape[1]), desc=f"{kneetype} latents"):
        latent_name = f'latent_{i}'
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")
                md = smf.mixedlm(f"{latent_name} ~ Time", df_sub, groups='Subject_Knee', re_formula="~Time")
                mdf = md.fit()

                # Check if any warnings were raised during fit
                warn_msgs = [str(ww.message) for ww in w]
                if len(warn_msgs) > 0:
                    status = 'warning'
                else:
                    status = 'converged'

            mean_responses[i] = mdf.params['Time']
            converged_latents.append(i)
            log_details.append({'latent_index': i,
                               'latent_name': latent_name,
                               'status': status,
                               'warning_msgs': "|".join(warn_msgs)})
        except Exception as e:
            failed_latents.append(i)
            log_details.append({'latent_index': i,
                               'latent_name': latent_name,
                               'status': 'failed',
                               'warning_msgs': str(e)})

    mean_response_dict[kneetype] = mean_responses

    similarity = 1 - cosine(b_vector, mean_response_dict[kneetype])
    print(f"Cosine similarity for {kneetype}: ", similarity)
    
    save_path = os.path.join(data_path, f'mean_PTOA_vector_{kneetype}.npy')
    np.save(save_path, mean_responses)

    # Save the log as a CSV for this knee type
    log_df = pd.DataFrame(log_details)
    log_csv_path = os.path.join(data_path, f'fit_log_{kneetype}.csv')
    log_df.to_csv(log_csv_path, index=False)
    print(f"--- {kneetype.upper()} SUMMARY ---")
    print(f"Converged: {len(converged_latents)} / {latents.shape[1]}")
    print(f"Failed: {len(failed_latents)} / {latents.shape[1]}")
    print(f"Log written to {log_csv_path}")
    if failed_latents:
        print("Failed indices:", failed_latents)


KneeType
ctrl    85
aclr    74
clat    74
Name: count, dtype: int64
Fitting model for kneetype: aclr


aclr latents: 100%|██████████| 512/512 [04:19<00:00,  1.98it/s]


Cosine similarity for aclr:  0.2541557645027448
--- ACLR SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BO_model/shape_change_between_knees_raw_by_visit/fit_log_aclr.csv
Fitting model for kneetype: clat


clat latents: 100%|██████████| 512/512 [05:04<00:00,  1.68it/s]


Cosine similarity for clat:  -0.030837180062404768
--- CLAT SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BO_model/shape_change_between_knees_raw_by_visit/fit_log_clat.csv
Fitting model for kneetype: ctrl


ctrl latents: 100%|██████████| 512/512 [06:13<00:00,  1.37it/s]

Cosine similarity for ctrl:  -0.093994685571398
--- CTRL SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BO_model/shape_change_between_knees_raw_by_visit/fit_log_ctrl.csv





## B+C

In [None]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from tqdm import tqdm
import matplotlib.pyplot as plt
import warnings
import os
from scipy.spatial.distance import cosine

data_path = '/dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BC_model/shape_change_between_knees_raw_by_visit'
latent_path = os.path.join(data_path, 'latents_subjects.npz')
b_vector_path = '/dataNAS/people/anoopai/KneePipeline/BSCORE_MODELS/NSM_Orig_BScore_Nov_2024/bscore_vector.npy'

# Load your OAI b-vector
b_vector = np.load(b_vector_path)

# ------ 1. Load data ------
npz = np.load(latent_path, allow_pickle=True)
subjects = npz['subjects']
visits = npz['visits']
knees = npz['knees']
latents = npz['latents']  # shape (N, 512)

visit_to_month = {1: 0.75, 2: 3, 3: 9, 4: 18, 5: 30}

# ------ 2. Dataframe build ------
df = pd.DataFrame({
    "Subject": subjects.astype(str),
    "Visit": visits,
    "Knee": knees.astype(str),
})
latents = latents.squeeze()
latent_colnames = [f"latent_{i}" for i in range(latents.shape[1])]
latent_df = pd.DataFrame(latents, columns=latent_colnames)
df = pd.concat([df.reset_index(drop=True), latent_df.reset_index(drop=True)], axis=1)

df['Visit'] = df['Visit'].astype(int)
df['Time'] = df['Visit'].map(visit_to_month)
df['Subject_Knee'] = df['Subject'] + "_" + df['Knee']

# ------ 3. Label Knee Types ------
df_temp = pd.DataFrame({'Subject': subjects.astype(str), 'Knee': knees.astype(str)})
df_counts = df_temp.groupby('Subject')['Knee'].unique()
aclr_subjects = set(df_counts[df_counts.apply(lambda arr: (('aclr' in arr) and ('clat' in arr)))].index)
control_subjects = set(df_counts[df_counts.apply(lambda arr: (len(arr)==1 and arr[0]=='ctrl'))].index)

def get_knee_type(row):
    if row['Subject'] in aclr_subjects:
        if row['Knee'].lower() == 'aclr':
            return 'aclr'
        else:
            return 'clat'
    elif row['Subject'] in control_subjects:
        return 'ctrl'
    else:
        return 'unknown'

df['KneeType'] = df.apply(get_knee_type, axis=1)
df = df[df['KneeType'] != 'unknown'].copy()

# ------ 4. Calculate mean latent trajectory per knee type ------
kneetypes = ['aclr', 'clat', 'ctrl']
mean_response_dict = {}

for kneetype in kneetypes:
    print(f'Fitting model for kneetype: {kneetype}')
    df_sub = df[df['KneeType'] == kneetype].copy()
    mean_responses = np.full(latents.shape[1], np.nan)  # (512, )
    converged_latents = []
    failed_latents = []
    log_details = []

    for i in tqdm(range(latents.shape[1]), desc=f"{kneetype} latents"):
        latent_name = f'latent_{i}'
        try:
            with warnings.catch_warnings(record=True) as w:
                warnings.simplefilter("always")
                md = smf.mixedlm(f"{latent_name} ~ Time", df_sub, groups='Subject_Knee', re_formula="~Time")
                mdf = md.fit()

                # Check if any warnings were raised during fit
                warn_msgs = [str(ww.message) for ww in w]
                if len(warn_msgs) > 0:
                    status = 'warning'
                else:
                    status = 'converged'

            mean_responses[i] = mdf.params['Time']
            converged_latents.append(i)
            log_details.append({'latent_index': i,
                               'latent_name': latent_name,
                               'status': status,
                               'warning_msgs': "|".join(warn_msgs)})
        except Exception as e:
            failed_latents.append(i)
            log_details.append({'latent_index': i,
                               'latent_name': latent_name,
                               'status': 'failed',
                               'warning_msgs': str(e)})

    mean_response_dict[kneetype] = mean_responses

    similarity = 1 - cosine(b_vector, mean_response_dict[kneetype])
    print(f"Cosine similarity for {kneetype}: ", similarity)
    
    save_path = os.path.join(data_path, f'mean_PTOA_vector_{kneetype}.npy')
    np.save(save_path, mean_responses)

    # Save the log as a CSV for this knee type
    log_df = pd.DataFrame(log_details)
    log_csv_path = os.path.join(data_path, f'fit_log_{kneetype}.csv')
    log_df.to_csv(log_csv_path, index=False)
    print(f"--- {kneetype.upper()} SUMMARY ---")
    print(f"Converged: {len(converged_latents)} / {latents.shape[1]}")
    print(f"Failed: {len(failed_latents)} / {latents.shape[1]}")
    print(f"Log written to {log_csv_path}")
    if failed_latents:
        print("Failed indices:", failed_latents)


KneeType
ctrl    85
aclr    74
clat    74
Name: count, dtype: int64
Fitting model for kneetype: aclr


aclr latents: 100%|██████████| 512/512 [04:14<00:00,  2.01it/s]


Cosine similarity for aclr:  0.09099428408169186
--- ACLR SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BC_model/shape_change_between_knees_raw_by_visit/fit_log_aclr.csv
Fitting model for kneetype: clat


clat latents: 100%|██████████| 512/512 [05:35<00:00,  1.52it/s]


Cosine similarity for clat:  0.038511052231120724
--- CLAT SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BC_model/shape_change_between_knees_raw_by_visit/fit_log_clat.csv
Fitting model for kneetype: ctrl


ctrl latents: 100%|██████████| 512/512 [06:23<00:00,  1.34it/s]

Cosine similarity for ctrl:  -0.04128280054298261
--- CTRL SUMMARY ---
Converged: 512 / 512
Failed: 0 / 512
Log written to /dataNAS/people/anoopai/DESS_ACL_study/results/BScore_and_FC_metrics/mean_shape_recon/BC_model/shape_change_between_knees_raw_by_visit/fit_log_ctrl.csv



