In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np

In [None]:
df = pd.read_csv("../working_data/sleep_intervals.csv")
print(f"Number of nights: {len(df)}")

df_demo = pd.read_csv("../original_data/mhs_demographics_sorted.csv")
df_total = pd.merge(df_demo, df, on="USER_ID", how="inner")
df_ch = df_total[df_total["COUNTRY"] == "CH"]
print(f"Number of nights of Swiss participants: {len(df_ch)}")

In [None]:
# set to True if only Swiss dataset should be used
only_CH = True

if (only_CH):
    df = df_ch

In [None]:
for i in range(7, 15):
    df[f"interval_{i}"] = df["interval_length"] // i
    df[f"short_{i}"] = ((df["interval_length"] < i) | (df["interval_length"] % i != 0)).astype(int)
    
print(df.head())

In [None]:
data = {
    "Interval Length (i)": list(range(7, 15)),
    "Sum of interval_i": [df[f"interval_{i}"].sum() for i in range(7, 15)],
    "Sum of short_i": [df[f"short_{i}"].sum() for i in range(7, 15)]
}

df_table = pd.DataFrame(data)

# Calculate Short/Interval Ratio
df_table["Short/Interval Ratio"] = df_table["Sum of short_i"] / df_table["Sum of interval_i"]
df_table.replace([float("inf"), float("nan")], 0, inplace=True)  # Handle division by zero

# Print the table
print(df_table.to_string(index=False))

# Plot the grouped bar chart
labels = df_table["Interval Length (i)"].astype(str)
interval_sums = df_table["Sum of interval_i"]
short_sums = df_table["Sum of short_i"]

x = np.arange(len(labels)) # Bar positions
width = 0.4 # Bar width

plt.figure(figsize=(8, 5))  # Set figure size
plt.bar(x - width/2, interval_sums, width=width, label="Sum of interval_i", edgecolor="black")
plt.bar(x + width/2, short_sums, width=width, label="Sum of short_i", edgecolor="black")

plt.xticks(x, labels)
plt.xlabel("Interval Length (i)")
plt.ylabel("Count")
plt.title("Number of usable intervals and too short ones based on interval length")
plt.legend()
plt.savefig(f"descriptive_statistics_plots/number_of_intervals_based_on_length.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
# find how many intervals of length 7 each user has and plot it

user_sums = df.groupby('USER_ID')['interval_7'].sum()

plt.hist(user_sums, bins=10, edgecolor='black')
plt.title('Histogram of intervals of length 7 days per user')
plt.xlabel('Number of intervals')
plt.ylabel('Number of Users')
plt.savefig(f"descriptive_statistics_plots/hist_number_of_intervals_per_user.png", dpi=300)
plt.show()