In [None]:
# Work 14: Histogram of HFRS Scores per Patient: Distribution Analysis and Risk Classification [W14.HFRS.8.Plot_Pivot_HFRS.ipynb]

# "This notebook script loads HFRS data, calculates statistics, and plots a histogram to visualize the distribution and risk classification of 
#  HFRS scores per patient.""

########################################################################################################
#  Sequence list
########################################################################################################

# 1: Load the transformed data from the CSV file.
# 2: Calculate the average HFRS score per patient and the standard deviation.
# 3: Draw a histogram showing the distribution of HFRS scores per patient.
# 4: Add the mean and standard deviation to the plot with a red dashed line and text.
# 5: Add the boundary for the intermediate risk class at 5 points and label it "Intermediate risk (5–15)".
# 6: Set the title, x-axis, y-axis, and label the x-axis values.
# 7: Save and display the plot.

########################################################################################################
########################################################################################################

import pandas as pd
import matplotlib.pyplot as plt

# 1: Load the transformed data with HFRS scores
transformed_data_with_hfrs_path = '/home/HUSTIETOALLAS/ext13144568/mounts/research/Tomi_K/ICDsummary/kesätyö/transformed_all_data_with_HFRS.csv'
data_with_hfrs_df = pd.read_csv(transformed_data_with_hfrs_path)

print("1: Data loaded successfully.")

# 2: Calculate the average HFRS score per patient and the standard deviation
mean_hfrs = data_with_hfrs_df['HFRS'].mean()
std_hfrs = data_with_hfrs_df['HFRS'].std()

print(f"2A: Average HFRS score per patient: {mean_hfrs:.2f}")
print(f"2B: Standard deviation of HFRS scores: {std_hfrs:.2f}")

# 3: Draw a histogram of HFRS scores per patient
plt.figure(figsize=(10, 6))
plt.hist(data_with_hfrs_df['HFRS'], bins=30, edgecolor='black', alpha=0.7)

print("3: Printed histogram successfully.")

# 4: Add the mean and standard deviation to the plot
plt.axvline(mean_hfrs, color='r', linestyle='dashed', linewidth=1)
plt.text(mean_hfrs + std_hfrs / 10, plt.ylim()[1] * 0.9, f'Mean: {mean_hfrs:.2f}', color='r')
plt.text(mean_hfrs + std_hfrs / 10, plt.ylim()[1] * 0.85, f'Std: {std_hfrs:.2f}', color='r')

print("4: Added mean and std to histogram successfully.")

# 5: Add the boundary for the intermediate risk class
plt.axvline(5, color='g', linestyle='dashed', linewidth=1)
plt.text(5 + std_hfrs / 10, plt.ylim()[1] * 0.8, 'Intermediate risk (5–15)', color='g')

print("5: Added intermediate risk marker to histogram successfully.")

# 6: Set the title and other labels for the plot
plt.title('Distribution of HFRS Scores per Patient')
plt.xlabel('HFRS Score')
plt.ylabel('Number of Patients')
plt.grid(axis='y')

print("6: Added title and texts to histogram successfully.")

# 7: Save and display the plot
plot_path = '/home/HUSTIETOALLAS/ext13144568/mounts/research/Tomi_K/plots/k13k.HFRS_score_distribution.png'
plt.savefig(plot_path)

print(f"7: Histogram saved to {plot_path}")
plt.close()
