#### Count distribution of episode, patient in all the episodes, first episode, last episode

In [None]:
from dotenv import load_dotenv
import os
import pandas as pd
import ast
# Load environment variables from .env file
load_dotenv()

##### In all episodes (1)

In [None]:
## In all episodes before encoding of F90 - part 1

trajectory_df = pd.read_csv(os.getenv('trajectory_df_path'))

# Function to check the presence of diagnosis codes starting with 'F90'
def check_F90(diagnosis):
    if pd.isna(diagnosis):
        return "NaN Only"
    diagnosis_list = ast.literal_eval(diagnosis)
    # Check if any diagnosis code starts with 'F90'
    if any(code.startswith("F90") for code in diagnosis_list):
        return "F90 Present"
    return "Other Diagnosis"


# Apply the function to the 'diagnosis' column
trajectory_df["diagnosis_check"] = trajectory_df["diagnosis"].apply(check_F90)

# Count the occurrences of each type of diagnosis for each patient
diagnosis_counts = (
    trajectory_df.groupby(["pasient", "diagnosis_check"]).size().unstack(fill_value=0)
)

# Display the counts
display(trajectory_df)

# Calculate and display the number of patients with each diagnosis type
patients_summary = diagnosis_counts.gt(0).sum() #this operation counts the number of True values, effectively counting the number of diagnoses that occur more than 0 times
print("Number of patients with each diagnosis type:")
print(patients_summary)

# Calculate and display the number of episodes with each diagnosis type
episodes_summary = diagnosis_counts.sum()
print("\nNumber of episodes with each diagnosis type:")
print(episodes_summary)

In [None]:
## In all episodes before encoding of F90 - part 2

trajectory_df = pd.read_csv(
    os.getenv('trajectory_df_path')
)

patient_trajectory_df = trajectory_df
print(f"Total Episodes: {trajectory_df['episode_id'].nunique()}")
print(f"Episode with NaN: {trajectory_df['diagnosis'].isna().sum()}")
print(
    f"Episode with F90: {trajectory_df[trajectory_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)
# count Episodes without F90, excluding NaN
trajectory_df = trajectory_df[~trajectory_df["diagnosis"].isna()]
print(
    f"Episode without F90, and NaN: {trajectory_df[~trajectory_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)

print("\n")
totalPatients = patient_trajectory_df["pasient"].nunique()
print(f"Total unique Patients: {totalPatients}")
patientWithNaN = patient_trajectory_df[patient_trajectory_df["diagnosis"].isna()][
    "pasient"
].nunique()
print(f"Unique Patients with NaN: {patientWithNaN}")
patient_trajectory_df = trajectory_df[~trajectory_df["diagnosis"].isna()]

patientWithF90 = patient_trajectory_df[
    patient_trajectory_df["diagnosis"].str.contains("F90", na=False)
]["pasient"].nunique()
print(f"Unique Patients with F90: {patientWithF90}")
print(
    f"Unique Patients without NaN and F90: {totalPatients - patientWithNaN - patientWithF90}" 
#or f"Total Patient excluding NaN and F90: {patient_trajectory_df['pasient'].nunique() - (patient_trajectory_df[patient_trajectory_df['diagnosis'].str.contains('F90', na=False)]['pasient'].nunique()) - patient_trajectory_df[patient_trajectory_df['diagnosis'].isna()]['pasient'].nunique()}"

)

In [None]:
## In all episodes after encoding of F90
import pandas as pd
import ast

trajectory_df = pd.read_csv(
    os.getenv('trajectory_df_path')
)

forPatient_Trajectory_df = trajectory_df

# Count the total episodes, episode with NaN
print(f"Total Episodes: {trajectory_df['episode_id'].nunique()}")
episodeNaNCount = trajectory_df["diagnosis"].isna().sum()
print(f"Episode with NaN: {episodeNaNCount}")

noNanTrajectory_df = trajectory_df[~trajectory_df["diagnosis"].isna()]
print(f"Total Episodes without NaN: {noNanTrajectory_df['episode_id'].nunique()}")

noNanTrajectory_df["ADHD"] = noNanTrajectory_df["diagnosis"].apply(
    lambda x: 1 if "F90" in str(x) else 0
)

print(
    f"Episode with ADHD, excluding NaN: {noNanTrajectory_df[noNanTrajectory_df['ADHD'] == 1]['episode_id'].nunique()}"
)
print(
    f"Episode without ADHD, excluding NaN: {noNanTrajectory_df[noNanTrajectory_df['ADHD'] == 0]['episode_id'].nunique()}"
)
print("\n")
# Display the count of patient with ADHD and without ADHD
print(f"Total Patient including NaN: {forPatient_Trajectory_df['pasient'].nunique()}")

# Get unique patients
unique_patients = noNanTrajectory_df["pasient"].unique()
print(f"Total Patient excluding NaN: {len(unique_patients)}")

print(
    f"Total Patient with NaN: {forPatient_Trajectory_df[forPatient_Trajectory_df['diagnosis'].isna()]['pasient'].nunique()}"
)

# Get unique patients with ADHD == 1
adhd_patients = noNanTrajectory_df[noNanTrajectory_df["ADHD"] == 1][
    "pasient"
].unique()
print(f"Total Patient, excluding NaN with F90: {len(adhd_patients)}")

print(
    f"Total Patient excluding NaN and F90: {forPatient_Trajectory_df['pasient'].nunique() - (forPatient_Trajectory_df[forPatient_Trajectory_df['diagnosis'].str.contains('F90', na=False)]['pasient'].nunique()) - forPatient_Trajectory_df[forPatient_Trajectory_df['diagnosis'].isna()]['pasient'].nunique()}"
)

##### In the first episode of each patient

In [None]:
import pandas as pd
import ast

trajectory_df = pd.read_csv(
    os.getenv('trajectory_df_path')
)

#selecting the first row of each group effectively picks out the earliest episode for each patient.
first_episode_df = (
    trajectory_df.sort_values(by=["pasient", "episode_start_date"], ascending=True)
    .groupby("pasient")
    .nth(0) #This method selects the first row (n=0) from each group.
)
display(first_episode_df)
print(f"Total Unique Patients including NaN: {first_episode_df['pasient'].nunique()}")
episodeNaNCount = first_episode_df["diagnosis"].isna().sum()
print(f"Unique Patients with NaN: {episodeNaNCount}")
print(
    f"Unique Patients with F90: {first_episode_df[first_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)
print(
    f"Unique Patients without F90, include NaN: {first_episode_df[~first_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)

first_episode_df = first_episode_df[~first_episode_df["diagnosis"].isna()]
print(
    f"Unique Patients without F90, and NaN: {first_episode_df[~first_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)

##### In the last episode of each patient

In [None]:
import pandas as pd
import ast

trajectory_df = pd.read_csv(
    os.getenv('trajectory_df_path')
)
first_episode_df = (
    trajectory_df.sort_values(by=["pasient", "episode_start_date"], ascending=True)
    .groupby("pasient")
    .nth(-1)
)
# From trajectory_df select only those rows which are the first episode of the each patient and save it to a new dataframe
last_episode_df = (
    trajectory_df.sort_values(by=["pasient", "episode_start_date"], ascending=True)
    .groupby("pasient")
    .nth(-1)
)
display(last_episode_df)
print(f"Total Patient including NaN: {last_episode_df['pasient'].nunique()}")
episodeNaNCount = last_episode_df["diagnosis"].isna().sum()
print(f"Patients with NaN: {episodeNaNCount}")
print(
    f"Patients with F90: {last_episode_df[last_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)
print(
    f"Patients without F90, include NaN: {last_episode_df[~last_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)

last_episode_df = last_episode_df[~last_episode_df["diagnosis"].isna()]
print(
    f"Patients without F90, and NaN: {last_episode_df[~last_episode_df['diagnosis'].str.contains('F90', na=False)]['episode_id'].nunique()}"
)