In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt
import json

print(sns.__version__)
# Save a palette to a variable:
palette = sns.color_palette("bright")

sns.set_palette(palette)
sns.set(style="whitegrid", font_scale=1.5)

# corona


<div class="alert alert-block alert-info">
<h2> Baseline similarity </h2>
</div>

In [None]:
CORONA_PATH = "/m/cs/work/luongn1/digirhythm/data/processed/corona/similarity_matrix/"

study = "corona"
uid = "3539009742"
freq = "4epochs"

# Take a sample
behavior_vector = pd.read_csv(
    f"/m/cs/work/luongn1/digirhythm/data/processed/corona/vector_corona_{freq}.csv"
)
sample = pd.read_csv(CORONA_PATH + f"{freq}/similarity_{uid}.csv", index_col=0)
similarity_baseline_si = pd.read_csv(
    CORONA_PATH + f"si/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_cluster = pd.read_csv(
    CORONA_PATH + f"cluster/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_average = pd.read_csv(
    CORONA_PATH + f"average/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_si.index = similarity_baseline_si.index.astype("str")
similarity_baseline_cluster.index = similarity_baseline_cluster.index.astype("str")
similarity_baseline_average.index = similarity_baseline_cluster.index.astype("str")
behavior_vector.subject_id = behavior_vector.subject_id.astype("str")

sample.shape

with open("../config/features.txt") as f:
    features_dict = json.load(f)
    FEATURES = features_dict[study][freq]

In [None]:
def recurrent_plot(df):
    # Viz
    sns.heatmap(df, cmap="Blues")


s_si = similarity_baseline_si.loc[uid].dropna()
s_cluster = similarity_baseline_cluster.loc[uid].dropna()
s_avg = similarity_baseline_average.loc[uid].dropna()

print(f"UID: {uid}. Frequency: {freq}")

# Line plot
plt.figure(figsize=(16, 4))
# sns.lineplot(x=s.index, y=s.values)
sns.histplot(s_avg.values, bins=20, kde=True)
plt.title("Similarity to baseline - Average")

# Line plot
plt.figure(figsize=(16, 4))
# sns.lineplot(x=s.index, y=s.values)
sns.histplot(s_si.values, bins=20, kde=True)
plt.title("Similarity to baseline - Stability Index")

# Line plot
plt.figure(figsize=(16, 4))
# sns.lineplot(x=s.index, y=s.values)
sns.histplot(s_cluster.values, bins=20, kde=True)
plt.title("Similarity to baseline - Cluster")


# Heatmap
plt.figure(figsize=(8, 8))
# Define the threshold

threshold = sample.mean() - 1.85 * sample.std()

# Replace values above the threshold with NaN
sample_masked = sample.where(sample < threshold)
sns.heatmap(sample_masked, cmap="Blues", vmin=0, vmax=1)
xlabel = "Day" if freq == "4epochs" else "Week"
plt.xlabel(xlabel)
plt.title("Outliers from baseline - 1.85 standard deviation from the mean")

plt.tight_layout()
plt.show()

In [None]:
# Outliers - Days deviating the most from baseline
def outliers(similarity, thres=1.85):
    # Calculate mean and standard deviation
    mean = similarity.mean()
    std_dev = similarity.std()

    # Calculate the lower threshold
    lower_threshold = mean - thres * std_dev

    # Find indexes of outliers
    outliers = similarity[similarity <= lower_threshold].index

    return outliers


outlier_si = outliers(s_si)
outlier_avg = outliers(s_avg)
outlier_cluster = outliers(s_cluster)

print("Outlier SI: ", outlier_si)
print("Outlier AVG: ", outlier_avg)
print("Outlier Cluster: ", outlier_cluster)

# Pairwise agreements
si_avg_agreement = len(outlier_si.intersection(outlier_avg)) / len(
    outlier_si.union(outlier_avg)
)
avg_cluster_agreement = len(outlier_avg.intersection(outlier_cluster)) / len(
    outlier_avg.union(outlier_cluster)
)
cluster_si_agreement = len(outlier_cluster.intersection(outlier_si)) / len(
    outlier_cluster.union(outlier_si)
)

# Three-way agreement
three_way_agreement = len(
    outlier_si.intersection(outlier_avg).intersection(outlier_cluster)
) / len(outlier_si.union(outlier_avg).union(outlier_cluster))

print(f"SI and AVG Agreement: {si_avg_agreement}")
print(f"AVG and Cluster Agreement: {avg_cluster_agreement}")
print(f"Cluster and SI Agreement: {cluster_si_agreement}")
print(f"Three-way Agreement: {three_way_agreement}")

In [None]:
# Print values in outliers
if freq == "4epochs":
    sample_behaviour = behavior_vector.reset_index()[
        behavior_vector.subject_id == uid
    ].reset_index()
    sample_behaviour.iloc[outliers(s_cluster)][FEATURES]

    print("Difference from average behaviour")
    print(
        (
            sample_behaviour.iloc[outliers(s_cluster)][FEATURES]
            - sample_behaviour[FEATURES].mean()
        ).to_string()
    )
elif freq == "7ds":
    sample_behaviour = behavior_vector.reset_index()[behavior_vector.subject_id == uid][
        0::7
    ].reset_index()
    sample_behaviour.dropna(inplace=True, subset=FEATURES_7DS)
    #    sns.lineplot(x=sample_behaviour.index, y=sample_behaviour['stepsx1000:total'])
    #    sns.lineplot(x=sample_behaviour.index, y=sample_behaviour['tst:mean'])
    sns.lineplot(x=sample_behaviour.index, y=sample_behaviour["midsleep:mean"])

    sample_behaviour.iloc[outliers(s_cluster)][FEATURES_7DS]

    print(
        "Average behaviour: \n",
        sample_behaviour[FEATURES_7DS].agg(["mean", "std"]).to_string(),
    )
    print("Difference from average behaviour")
    print(
        (
            sample_behaviour.iloc[outliers(s_cluster)][FEATURES_7DS]
            - sample_behaviour[FEATURES_7DS].mean()
        ).to_string()
    )


else:
    print("Dis")


<div class="alert alert-block alert-info">
<h2> Collective behaviour of outliers </h2>
</div>

In [None]:
CORONA_PATH = "/m/cs/work/luongn1/digirhythm/data/processed/corona/similarity_matrix/"
FEATURES = [
    "heart_rate_variability_avg",
    "stepsx1000:total",
    "steps:night:norm",
    "steps:morning:norm",
    "steps:afternoon:norm",
    "steps:evening:norm",
    "tst",
    "midsleep",
]
FEATURES_7DS = [
    "heart_rate_variability_avg:mean",
    "steps:night:7ds:sum:norm",
    "steps:morning:7ds:sum:norm",
    "steps:afternoon:7ds:sum:norm",
    "steps:evening:7ds:sum:norm",
    "stepsx1000:total",
    "tst:mean",
    "midsleep:mean",
]

freq = "4epochs"

# Take a sample
behavior_vector = pd.read_csv(
    f"/m/cs/work/luongn1/digirhythm/data/processed/corona/vector_corona_{freq}.csv",
    index_col=0,
)
# sample = pd.read_csv(CORONA_PATH + f"{freq}/similarity_{uid}.csv", index_col=0)
similarity_baseline_si = pd.read_csv(
    CORONA_PATH + f"si/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_si.index = similarity_baseline_si.index.astype("str")
behavior_vector.subject_id = behavior_vector.subject_id.astype("str")
behavior_vector.sort_values(["subject_id", "date"], inplace=True)

In [None]:
outliers_df = pd.DataFrame()
for uid in similarity_baseline_si.index:
    s_si = similarity_baseline_si.loc[uid].dropna()
    outlier_si = s_si.iloc[outliers(s_si)]

    outliers_behav = behavior_vector[behavior_vector.subject_id == uid].iloc[
        outliers(s_si)
    ][FEATURES]

    outliers_df = pd.concat([outliers_df, outliers_behav])

outliers_df

# MOMO

In [3]:
MOMO_PATH = "/m/cs/work/luongn1/digirhythm/data/processed/momo/similarity_matrix/"

study = "momo"
freq = "4epochs"


behavior_vector = pd.read_csv(
    f"/m/cs/work/luongn1/digirhythm/data/processed/momo/vector_momo_{freq}.csv",
    index_col=0,
)
# sample = pd.read_csv(MOMO_PATH + f"{freq}/similarity_{uid}.csv", index_col=0)
similarity_baseline_si = pd.read_csv(
    MOMO_PATH + f"si/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_cluster = pd.read_csv(
    MOMO_PATH + f"cluster/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_average = pd.read_csv(
    MOMO_PATH + f"average/similarity_baseline_{freq}.csv", index_col=0
)
similarity_baseline_si.index = similarity_baseline_si.index.astype("str")
similarity_baseline_cluster.index = similarity_baseline_cluster.index.astype("str")
similarity_baseline_average.index = similarity_baseline_cluster.index.astype("str")


similarity_baseline_si

NameError: name 'pd' is not defined