In [None]:
# Work 8: Comprehensive Patient Data Integration and Hospital Frailty Risk Score (HFRS) Scoring in Python 
# [W8.HFRS.2.Calculate_HFRS_Points.ipynb]

# This notebook merges multiple patient data sources, calculates HFRS from ICD codes, and outputs 
# final frailty scores with validated data integration for insight etc

########################################################################################################
#  Sequence list
########################################################################################################

# 1: Define file paths
# 2: Select the correct columns
# 3: Load HFRS ICD-10 code points data
# 4: Convert ICD-10 code points to dictionary and fix points to float
# 5: Print a few rows to ensure the data is correct
# 6: Perform consistency checks on merged data

########################################################################################################
########################################################################################################

import pandas as pd

# 1: Define file paths
all_data_path = '/home/work/all_data.csv'
dgn_path = '/home/work/dataset1.csv'
kertomus_path = '/home/work/dataset2.csv'
til_tap_path = '/home/work/dataset3.csv'
icd_points_path = 'https://raw.githubusercontent.com/Tupatuko2023/Python-R-Scripts/main/tables/W8.HFRS.ICD-10_points.xlsx'
output_path = '/home/work/hfrs_scores.csv'

print("1: Defined file paths ")

# 2: Load data
dgn_df = pd.read_csv(dgn_path, sep='|', usecols=['Potilas_ID','ICD_code'])
kertomus_df = pd.read_csv(kertomus_path, sep='|', usecols=['Potilas_ID', 'ICD_code'])
til_tap_df = pd.read_csv(til_tap_path, sep='|', usecols=['Potilas_ID', 'ICD_code1', 'ICD_code2'])
all_data_df = pd.read_csv(all_data_path, dtype=str, usecols=['Potilas_ID', 'ICD_code'])

print("2: Loaded the data ")

# 3: Load ICD-10 code points
icd_points_df = pd.read_excel(icd_points_path)

print("3: Loaded the ICD-10 code points ")

# 4: Convert ICD-10 code points to dictionary and fix points to float
icd_points = dict(zip(icd_points_df['ICD-10 Code'], icd_points_df['Points'].str.replace(',', '.').astype(float)))

print("3: Converted the ICD-10 code points to dictionary ")

# 5: Print a few rows to ensure the data is correct
print("All Data Sample:")
print(all_data_df.head())
print("\nICD Points Sample:")
print(icd_points_df.head())
print("\nICD Points Dictionary Sample:")
print({k: icd_points[k] for k in list(icd_points)[:5]})

# 6: Create function to calculate HFRS
def calculate_hfrs(df, icd_points, icd_columns, output_path):
    df = df.melt(id_vars=['Potilas_ID'], value_vars=icd_columns, var_name='Diagnosis_Type', value_name='ICD_code')
    df.dropna(subset=['ICD_code'], inplace=True)
    df['ICD_code'] = df['ICD_code'].astype(str)
    df['HFRS'] = df['ICD_code'].map(icd_points).fillna(0).astype(float)
    hfrs_scores = df.groupby('Potilas_ID')['HFRS'].sum().reset_index()
    hfrs_scores.to_csv(output_path, index=False)
    print(f"6: HFRS points were calculated and saved to '{output_path}'.")

# 7: Calculate HFRS for each dataset and save results for dgn_df
calculate_hfrs(dgn_df, icd_points, ['ICD_code'], '/home/work/dataset1_hfrs.csv')

# 8: Calculate HFRS and save results for kertomus_df
calculate_hfrs(kertomus_df, icd_points, ['ICD_code'], '/home/work/dataset2_hfrs.csv')

# 9: Calculate HFRS and save results for til_tap_df
calculate_hfrs(til_tap_df, icd_points, ['ICD_code1', 'ICD_code2'], '/home/work/dataset3_hfrs.csv')

