# Introduction
In this notebook, I will describe all the steps that I have taken to make a new dataset for Weekly CTs.

Basically, the process contains five different steps:

1. Navigation of the folder in which one think there maybe any weeklyCTs. These folders can be on this computer or a user can just make these folders by downloading new patients from MIRADA or other UMCG datasets.

2. Extracting only weeklyCTs from these folders and make an excel file from them.

3. Transferring the new-founded weeklyCTs into a destination folder (it can be an existing folder for the weeklyCTs or a new folder).

4. Making a report excel file of some information about the weeklyCTs in the destination file and some clinical information from the patients who have these weeklyCTs.

5. Making a pannel that contains different information about the WeeklyCT dataset.

6. A Watchdog is keep the track of all the additions to the destination folder, and save them in a log file.

In [1268]:
# General Libraries
import os
import re
import glob
import math
import shutil
import panel as pn
import numpy as np
import pandas as pd
from random import randint
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from datetime import time, datetime, date

# DICOM Libraries
import pydicom as pdcm
from pydicom.tag import Tag


# Bokeh libraries
from bokeh.layouts import gridplot
from bokeh.plotting import ColumnDataSource
from bokeh.models import MultiChoice, LabelSet
from bokeh.io import output_notebook, output_file
from bokeh.plotting import figure, show, row, reset_output

# Activate bokeh output and panel extension
output_notebook()
pn.extension()

# 1. Navigation Phase
### DICOM Files
All kinds of CTs were stored in the form of DICOM files. DICOM, which stands for Digital Imaging and Communications in Medicine, is a standard for transmitting, storing, and sharing medical images. DICOM files contain information about medical images, such as X-rays, CT scans, MRIs, and ultrasound. This standard ensures the interoperability of medical imaging equipment from different manufacturers. Some key features are:

**Metadata:** DICOM files store not only the pixel data of the medical images but also a wealth of metadata. This metadata includes patient information, imaging device details, acquisition parameters, and more.

**Interoperability:** DICOM enables the exchange of medical images and related information between different devices and systems. This interoperability is crucial in healthcare settings where various imaging modalities and equipment are used.

**Structured Data:** DICOM files use a structured format for information, allowing for consistency and ease of interpretation by different systems. This makes it possible for healthcare professionals to access and understand the data regardless of the equipment used to capture or generate the images.

For information of different tags and the definitions one can use the following links: [Wiki](https://en.wikipedia.org/wiki/DICOM), [link](https://dicom.innolitics.com/ciods)


In [1216]:
def get_folder_name(image, subf):

    # find the name of the folder
    try:
        folder_name = image[Tag(0x0008103e)].value

    except:
        study = image[Tag(0x00081030)].value
        patient_id = image[Tag(0x00100020)].value
        print(f'Warning: folder {study} with {patient_id} ID does NOT have Series Description')
        folder_name = subf.split('\\')[-1]  

    return folder_name

def get_patient_id(image):

    # Extract the patient ID
    try:
        patient_id = int(image[Tag(0x00100020)].value)

    except:
        print(f'Warning: There is NO patient ID')
        patient_id = None

    return patient_id

def get_probable_weklyct_name(name, number, names_list, saver):

    lowercase_name = name.lower()

    # Search to find 'rct' or 'w' with a number
    if ('rct' in lowercase_name or 'w' in lowercase_name) and re.search(r'\d', name):
        saver = name

    elif 'wk..' in lowercase_name and not re.search(r'\d', name):
        saver = name

    # Check if 'w' is in 'j' and the next element in 'sep_names' is an integer
    elif 'w' in lowercase_name and number + 1 < len(names_list) and not re.search(r'\d', name):

        if '2.0' not in names_list[number + 1] and '2,' not in names_list[number + 1]:
            saver = name + str(names_list[number + 1])

    elif re.search('rct.*[..]|rct.*[#]', lowercase_name) and not re.search(r'\d', name):
        saver = name
    
    else:
        pass

    return saver    
    
def get_hd_fov(name, hd_fov):

    lowercase_name = name.lower()
    # Search whether there is 'hd' or 'fov' in j
    if 'hd' in lowercase_name or 'fov' in lowercase_name:
        hd_fov = 1 
    
    else:
        pass
    
    return hd_fov

def get_fraction(name, fraction):

    lowercase_name = name.lower()

    # Find the fraction number
    if 'rct' in lowercase_name and re.search(r'\d', name):
        fraction = int(re.findall(r'\d+', name)[0])
    
    else:
        pass
    
    return fraction

def get_date_information(image):

    # Extract the date, the week day, and the week number from study date time
    try:
        study_datetime_CT = datetime.strptime(image[Tag(0x00080020)].value ,"%Y%m%d")
        date_info = study_datetime_CT.date()
        weekday = study_datetime_CT.weekday() + 1
        week_num = study_datetime_CT.isocalendar()[1] #week

    except:
        date_info = None
        weekday = None
        week_num = None 
    
    return date_info, weekday, week_num

def get_slice_thickness(image):
    
    # Extract slice thickness
    try:
        slice_thickness = image['00180050'].value
    except:
        slice_thickness = None
    
    return slice_thickness

def get_contrast(image):
    
    # Extract contrast information
    try:
        image[Tag(0x00180010)].value
        contrast=1

    except:
        contrast=0
    
    return contrast

def get_pixel_spacing(image):

    # Extract pixel spacing
    try:
        pixel_spacing = image[Tag(0x00280030)].value
    except:
        pixel_spacing = None
    
    return pixel_spacing

def get_ref_uid(image):

    # Extract UID
    try:
        uid = image['00200052'].value
    except:
        uid = None
    
    return uid

In [1217]:
def navigate_folder(path_folder, output_path, file_name):

    # Add in config
    exclusion_set = {'detail', 'ac_ct', 'ld_ct', 'ld ct', 'ac ct'} # CONFIG File
    min_slice_num = 50 # CONFIG File
    modality = 'CT' # CONFIG File

    # Make a group to save all the information
    group = list()

    for r, d, f in os.walk(path_folder):
        # make a list from all the directories 
        subfolders = [os.path.join(r, folder) for folder in d]

        for subf in subfolders:
            # number of slices (images) in each DICOM folder, and the name of the folders
            slice_num = len(glob.glob(subf+"/*.DCM"))

            # find whether subf is a path and the number of .DCM images is more than 50
            if slice_num > min_slice_num:

                # Extract the information of the image 
                image=pdcm.dcmread(glob.glob(subf+"/*.DCM")[0],force=True)
                folder_name = get_folder_name(image, subf)
    
                # Extract the CTs
                if image.Modality == modality and all(keyword not in folder_name.lower() for keyword in exclusion_set):
   
                    patient_id = get_patient_id(image)

                    # split the name of the folder into strings of information
                    names_list = folder_name.split()

                    # Initialize the following three patameters
                    saver = None
                    hd_fov = 0
                    fraction = None

                    for number, name in enumerate(names_list):
                        saver = get_probable_weklyct_name(name, number, names_list, saver) 
                        hd_fov = get_hd_fov(name, hd_fov)
                        fraction = get_fraction(name, fraction)

                    # Find different information
                    date_info, weekday, week_num = get_date_information(image)
                    slice_thickness = get_slice_thickness(image)
                    contrast = get_contrast(image)
                    pixel_spacing = get_pixel_spacing(image)
                    uid = get_ref_uid(image)

                    # Add the information of this group to the total dataset
                    group.append({
                                'ID': patient_id, 'folder_name': folder_name, 'date': date_info,
                                'week_day': weekday, 'week_num': week_num, 'info_header': saver,
                                'fraction': fraction, 'HD_FoV': hd_fov, 'slice_thickness': slice_thickness,
                                'num_slices': slice_num, 'pixel_spacing': pixel_spacing, 'contrast': contrast,
                                'UID': uid, 'path': subf
                                })
    
    # Make a datafrme from the main folder
    df = pd.DataFrame(group)

    # Save the dataframe
    df.to_excel(os.path.join(output_path,file_name), index=False)

    return df

In [1218]:
path_folder = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1'
output_path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1'

# Correct this one in the main code, this folder have a name tha follow the following structure: 'General_information_{folder_name}.xlsx'
file_name = 'General_information_ART_DATA1.xlsx' 
df = navigate_folder(path_folder, output_path, file_name)

Based on our knowledge about weeklyCTs, we know that they are only available after 2014, so we can just remove the patients before this specific time. Moreover, since this program just navigate all the folders, there may be some duplicated data in those folders, so I need to erase them from the dataset.

In [1219]:
def clean_dataframe(df):
    """
    clean the dataset
    """
    df_copy = df.copy()

    # Slice the part of the dataset after the mentioned time.
    time_limit = pd.Timestamp('2014-01-01') # CONFIG File
    df_copy = df_copy[pd.to_datetime(df_copy.date) > time_limit]

    # Drop the doplicated folders
    df_copy = df_copy.drop_duplicates(subset=['ID', 'folder_name', 'date'],
                                       keep='first', inplace=False, ignore_index=True)

    return df_copy

In [1220]:
df = clean_dataframe(df)

In this stage, I will drop all the remained CTs that are not WeeklyCTs.

In [1221]:
def get_firstday(df, date_list):
    try:
        first_day = df[df.date == date_list[1]].iloc[0].week_day
    except:
        first_day = None
    
    return first_day

def find_matching_header(info_headers):
    for header in info_headers:
        try:
            lowercase_header = header.lower()

            if any(keyword in lowercase_header for keyword in ['rct', 'w']) and re.search(r'\d', header):
                return header

            elif 'wk..' in lowercase_header and not re.search(r'\d', header):
                return header

            elif re.search(r'rct.*[..]|rct.*[#]', lowercase_header) and not re.search(r'\d', header):
                return header

        except Exception as e:
            print(f"An exception occurred: {e}")

    return None

def get_weeklycts_names(df, date_list):

    header_list = list()

    # Find the headers
    for session in date_list[1:]:
        info_headers = df[df.date == session].info_header.tolist()
        header = find_matching_header(info_headers)

        header_list.append(header)

    # Ensure the header_list has 9 elements
    header_list += [None] * (9 - len(header_list))

    return header_list

def get_accelerated_rt(patient_id, clinical_df):
    try:
        accelerated_rt = clinical_df[clinical_df.UMCG==int(patient_id)].Modality_adjusted.values[0]
    
    except:
        accelerated_rt = 'Not Mentioned'
    
    return accelerated_rt


In [1222]:
def extract_weeklyct_folders(df, output_path):
    """
    This function finds weeklyCTs and drops other types of CTs
    """
    # Call clinical df to extract Accelerated program for each patient
    clinical_df_path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART Hooman/Xerostomia_dataset.xlsx' # CONFIG File
    clinical_df = pd.read_excel(clinical_df_path)

    group = list()

    # Separate each ID dataframe
    id_df = pd.DataFrame(df.groupby(['ID']))

    for counter, id_num in enumerate(id_df[0]):

        df = id_df[1][counter]

        # Extract the parts suspected to contain weeklyCTs
        df = df[(df['folder_name'].str.lower().str.contains('rct') & (df['date'] != df['date'].min())) \
                | ((df['date'] == df['date'].min()))]
       
        date_list = sorted(list(df.date.unique())) # Find the list of dates
        rtstart = date_list[0] # Extract RTSTART  
        first_day = get_firstday(df, date_list) # the week day of the first treatment

        # Extract the weeklyCTs names and first day of the treatment
        header_list= get_weeklycts_names(df, date_list)

        # Extract other parameters
        durations = date_list[1:]
        weekly_ct_num = len(durations)       
        durations += [None] * (9 - len(durations)) # Ensure it has 9 elements
        Modality_adjusted = get_accelerated_rt(id_num, clinical_df)

        group.append({'ID': int(id_num), 'Baseline': rtstart, 'Session1': durations[0],
                        'Session2': durations[1], 'Session3': durations[2],'Session4': durations[3],
                        'Session5': durations[4], 'Session6': durations[5],'Session7': durations[6],
                        'Session8': durations[7],'Session9': durations[8], 'Fraction1': header_list[0],
                        'Fraction2': header_list[1], 'Fraction3': header_list[2],'Fraction4': header_list[3],
                        'Fraction5': header_list[4], 'Fraction6': header_list[5], 'Fraction7': header_list[6],
                        'Fraction8': header_list[7],'Fraction9': header_list[8], 'First_day': first_day,
                        'Number_of_CTs': df.shape[0], 'Number_of_weeklyCTs': weekly_ct_num, 'modality_adjusted':Modality_adjusted})
        
    # Make a datafrme from the main folder
    df_final = pd.DataFrame(group)

    # Drop the patients who does not have weeklyCTs
    df_final = df_final[~(df_final.Number_of_weeklyCTs == 0)]
    df_final = df_final.reset_index().drop(columns=['index'])

    # Save the dataframe
    # df_final.to_csv(os.path.join(output_path, file_name), index=False)

    return df_final

In [1223]:
# weekly_file_name = 'weeklyct_output.csv' DO NOT NEED THIS ONE, IT IS A MIDDLE PROCESS
weeklyct_df = extract_weeklyct_folders(df, output_path)

In the last step of Navigation Phase, I will replace all the strings in the header part of the dataframe into fraction numbers. There are multiple conditions here. some patients have their own fractions in their headers e.g. 'rct13', but some others have week number like 'wk3' or have a part of the repeated CT name such as 'rct..', 'wk', 'wk..', and so on. for the first group, I just use the number of fractions in the header. However, for the second and third group, I calculate the probable numeber of fractions using the following criteria.
if the patient has accelarated RT plan, I assume that they should get 1.2 fraction per day (only in working days), so it mean 6 fractions per week.Ans, for patients with other types of the treatment, I suppose that they  should get 1 fraction per working day, so in total 5 per week.

In [1224]:
# Define a custom function to extract numbers only if 'wk' is not present
def extract_numbers(text):
    if isinstance(text, str) and 'wk' not in text and re.search(r'\d', text):
        
        return  float(''.join(filter(str.isdigit, text)))       
    else:
        return text

def get_existing_fractions(df):
    """
    This function extract all the fractions exist in the data itself.
    """
    for header in df.iloc[:, 11:20].columns:
        df[header] = df[header].apply(extract_numbers)

    return df

def get_coef(Modality_adjusted):
    """
    Get the coefficient of the fractions
    """
    accelerated_list = ['Accelerated RT', 'Bioradiation'] # CONFIG File
    not_accelerated_list = ['Chemoradiation', 'Conventional RT'] # CONFIG File
    
    if Modality_adjusted in not_accelerated_list:
        coef = 1.0
    
    elif Modality_adjusted in accelerated_list:
        coef = 1.2

    else:
        coef = 0.0

    return coef

def calculate_fraction(raw, fraction, fraction_num, coef, counter):
    try:
    
        if isinstance(fraction, str) and 'wk' in fraction and  counter == 0:
            fraction_num = (len(pd.bdate_range( raw[f'Baseline'], raw[f'Session{1}'])) - 1) * coef + 1

        elif isinstance(fraction, str) and 'wk' in fraction and  counter != 0:
            fraction_num += (len(pd.bdate_range( raw[f'Session{counter}'], raw[f'Session{counter+1}'])) - 1) * coef
                
        elif isinstance(fraction, str) and 'wk' not in fraction and not re.search(r'\d', fraction) and counter==0:
            fraction_num += (len(pd.bdate_range( raw[f'Baseline'], raw[f'Session{1}'])) - 1) * coef + 1

        # This part does not work  if the rct.. or rct# is seperated from other part
        elif isinstance(fraction, str) and 'wk' not in fraction and not re.search(r'\d', fraction) and counter!=0:
            fraction_num += (len(pd.bdate_range( raw[f'Session{counter}'], raw[f'Session{counter+1}'])) - 1) * coef

        elif fraction is np.nan and counter < raw.Number_of_weeklyCTs and counter==0:
            fraction_num = (len(pd.bdate_range( raw[f'Baseline'], raw[f'Session{1}'])) - 1) * coef + 1

        elif fraction is np.nan and counter < raw.Number_of_weeklyCTs and counter!=0:
            fraction_num += (len(pd.bdate_range( raw[f'Session{counter}'], raw[f'Session{counter+1}'])) - 1) * coef              

        elif isinstance(fraction, int) or isinstance(fraction, float):
            fraction_num = fraction

        else:
            fraction_num = None
        return fraction_num 

    except:
        return fraction_num

In [1227]:
def add_fractions(df, output_path, file_name):
    """
    This function finds or calculates all the fractions
    """
    # Make a copy of the dataset
    df_copy = df.copy()
    coef_list = list()
    # Find all the existing fractions in the dataset
    df_copy = get_existing_fractions(df_copy)

    # Iterate through patients
    for index, raw in df_copy.iterrows():

        fraction_list = list()
        fraction_num = 0

        # Calculate the coefficient
        coef = get_coef(raw.modality_adjusted)

        # Iterate through fractions
        for counter, fraction in enumerate(raw.iloc[11:20]):

            # Calculate and add different fractions to the list of fractions
            fraction_num = calculate_fraction(raw, fraction, fraction_num, coef, counter)
            fraction_list.append(fraction_num)

        df_copy.iloc[index, 11:20] = fraction_list
        coef_list.append(coef)
 
    df_copy['Coefficient'] = coef_list

    # Save the dataframe
    df_copy.to_excel(os.path.join(output_path, file_name), index=False)

    return df_copy


In [1228]:
# Correct this one in the main code, this folder have a name tha follow the following structure: 'WeeklyCTs_fraction_{folder_name}.xlsx'
file_name = 'WeeklyCTs_fraction_ART_DATA1.xlsx'
weeklyct_df = add_fractions(weeklyct_df, output_path, file_name)

The last part of the first phase can be extracting the information of a specific week e.g. week3. To achieve this aim, I will make a function, that can be call and return an excel file for patients who have a specific week fraction. 

In [1261]:
def get_a_week_information(main_df, weeklyct_df, week_name):

    accelerated_list = ['Accelerated RT', 'Bioradiation'] # CONFIG File
    not_accelerated_list = ['Chemoradiation', 'Conventional RT'] # CONFIG File
    fraction_range_dict = {'week1': {'not_accelerated':[0.0, 5.0], 'accelerated': [0.0, 6.0]}, # Config File
                           'week2': {'not_accelerated':[5.0, 10.0], 'accelerated': [6.0, 12.0]},
                           'week3': {'not_accelerated':[10.0, 15.0], 'accelerated': [12.0, 18.0]},
                           'week4': {'not_accelerated':[15.0, 20.0], 'accelerated': [18.0, 24.0]},
                           'week5': {'not_accelerated':[20.0, 25.0], 'accelerated': [24.0, 30.0]},
                           'week6': {'not_accelerated':[25.0, 30.0], 'accelerated': [30.0, 36.0]},
                           'week7': {'not_accelerated':[30.0, 35.0], 'accelerated': [36.0, 42.0]},
                           'week8': {'not_accelerated':[35.0, 40.0], 'accelerated': [42.0, 48.0]}}
    week_list = list()

    # Iterate through patients
    for _, raw in weeklyct_df.iterrows():
        matching_list = []
        fraction_seri = raw.iloc[11:20]
        #print(raw.modality_adjusted)
        # Find any columns that have values inside the range of a a specific week
        if raw.modality_adjusted in not_accelerated_list:
            matching_list = [column for column in fraction_seri.index \
            if (raw[column]is not None and raw[column] > fraction_range_dict[week_name]['not_accelerated'][0] \
                and raw[column] <= fraction_range_dict[week_name]['not_accelerated'][1])]

        elif raw.modality_adjusted in accelerated_list:
            matching_list = [column for column in fraction_seri.index \
            if (raw[column]is not None and raw[column] > fraction_range_dict[week_name]['accelerated'][0] \
                and raw[column] <= fraction_range_dict[week_name]['accelerated'][1])]
        # print(matching_list)
        # If finds a column, add some information of  that patient to the dictionary
        if len(matching_list) > 0:
            for matched_fraction in matching_list:
                week_num = matched_fraction[-1]
                week_list.append({'ID': raw.ID,
                                 'date': raw[f'Session{week_num}'],
                                 'treatment_week': week_name,
                                 'Fraction_num': matched_fraction, 
                                 'Fraction_magnitude': raw[matched_fraction], 
                                 'modality_adjusted': raw.modality_adjusted})
            # print(week_list)
    # Make a datafrme from the main folder
    week_df = pd.DataFrame(week_list)
    
    try:
        final_df = week_df.merge(main_df, on=['ID', 'date']).drop(columns=['fraction'])
    
    except KeyError:
        print(f'Warning: this week dataset has {week_df.shape} shape')
        final_df = pd.DataFrame()
    
    return final_df

In [1231]:
week_df = get_a_week_information(df, weeklyct_df, 'week6')

# Report Phase
In this phase, dataframes from different folders (it can be one or more folders) gather to gether to make a total dataframe for all the dataset in different folders.

In [1232]:
def read_dataframe(name):

    output_path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1' # CONFIG File

    try:
        # If the file is an excel file
        if '.xlsx' in name:
            df = pd.read_excel(os.path.join(output_path, name))
            
        # If the file is a csv file
        elif '.csv' in name:
            df = pd.read_csv(os.path.join(output_path, name)) # Comma seperated

            # If the csv file is semi-colon seperated
            if ';' in df.columns[0]:
                df = pd.read_csv(os.path.join(output_path, name), sep=';')

        # Erase the index columns if there is any
        if any('unnamed' in col_name.lower() for col_name in df.columns):
            excess_column_names = [col_name for col_name in df.columns if 'unnamed' in col_name.lower()]
            df = df.drop(columns=excess_column_names)

        return df

    except FileNotFoundError:
        print(f'Warning: file {name} was not found')
    
    except ValueError:
        print(f'File {name} is not supported by this program.')


def concat_dataframes(df_name_list):
    """
    This function accepts excel and csv files. csvs can be comma-seperated or semicolon-seperated
    """
    # Make an empty df to gather all of the dataframes here.
    final_df = pd.DataFrame()

    for name in df_name_list:
        df = read_dataframe(name)

        try:
           final_df = pd.concat([final_df, df], ignore_index=True)

        except Exception as e:
            print(f'ERROR:error {e} ocurs for {name} folder')
            pass

    # Drop duplicated patients
    if 'weeklyct' in df_name_list[0].lower(): 
        final_df = final_df.drop_duplicates(subset=['ID'])

    # Reset the index
    final_df = final_df.sort_values('ID').reset_index().drop(columns=['index'])

    return final_df

In the next step, I will use the above functions to make two final datasets: WeeklyCT_dataset and General_dataset.

### WeeklyCT Final Dataframe
This dataset contains clinical and some technical information about the patients who have WeeklyCTs. This dataset will be used further in plotting phase.

In [1243]:
def find_dataframes(desired_file):
    """
    This function makes the list of the desired file names. It can be weeklyCT files or General files
    """
    output_path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1' # CONFIG File

    # Find all the relavant dataframes
    file_list = os.listdir(output_path)
    desired_file_list = [file_name for file_name in file_list if desired_file in file_name.lower()]

    return desired_file_list

def call_clinical_dataframe():
    clinical_df_name = 'Xerostomia_dataset.xlsx' # CONFIG File

    # Define a mapping between source and target column names
    column_mapping = {'UMCG': 'ID', # CONFIG File
                      'GESLACHT': 'gender', 
                      'LEEFTIJD': 'age',
                      'Loctum2': 'tumor_location',
                      'N_stage': 'n_stage',
                      'TSTAD_DEF': 't_stage',
                      'HN35_Xerostomia_M06': 'xer_06',
                      'HN35_Xerostomia_M12': 'xer_12'}   
    
    clinical_df = read_dataframe(clinical_df_name)
    desired_column_list = list(column_mapping.keys())

    # Slice the desired part
    clinical_df = clinical_df.loc[:,desired_column_list]

    # Map the name of the columns to the desired names
    clinical_df = clinical_df.rename(columns=column_mapping)

    return clinical_df


def make_weeklyct_dataframe():
    """
    This function makes the final weeklyCT dataframe
    """
    make_label_df = True # Config File
    label_list = ['xer_06', 'xer_12'] # Config File

    file_names = find_dataframes('weeklyct')
    df = concat_dataframes(file_names)
    clinical_df = call_clinical_dataframe()
    final_weeklyct_df = df.merge(clinical_df, on='ID')

    # Save the dataframe
    final_weeklyct_df.to_excel(os.path.join(output_path, 'Overview_weeklyCT_patients.xlsx'), index=False)

    # If dataframe based on labels is needed
    if make_label_df:
        for label in label_list:
            label_df = final_weeklyct_df[final_weeklyct_df[label].notnull()]
            label_df.to_excel(os.path.join(output_path, f'Overview_weeklyCT_patients_{label}.xlsx'), index=False)

    return final_weeklyct_df


In [1245]:
weekly_df = make_weeklyct_dataframe()

### General Dataframe
This dataframe contains the information of the available weeklyCT folder. This dataframe will be used further in transferring phase.

In [1262]:
def make_general_dataframe():
    week_list = ['week1', 'week2', 'week3', 'week4', 'week5', 'week6', 'week7', 'week8'] # CONFIG File (It can be week dictionary key list)
    output_path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1' # CONFIG File

    # Make a dataframe from all the general files
    file_names = find_dataframes('general')
    general_df = concat_dataframes(file_names)
    weekly_df = pd.read_excel(os.path.join(output_path, 'Overview_weeklyCT_patients.xlsx'))

    final_general_df = pd.DataFrame()

    # Make the datframe for each week and concat all of them to make a dataset
    for week_name in week_list:
        week_df = get_a_week_information(general_df, weekly_df, week_name)
        final_general_df = pd.concat([final_general_df, week_df], ignore_index=True)
    
    # Sort the dataset based on ID
    final_general_df = final_general_df.sort_values('ID').reset_index().drop(columns=['index'])
    # Save the dataframe
    final_general_df.to_excel(os.path.join(output_path, 'General_information.xlsx'), index=False)

    return final_general_df

In [1263]:
final_general_df = make_general_dataframe()



In [1264]:
final_general_df.head()

Unnamed: 0,ID,date,treatment_week,Fraction_num,Fraction_magnitude,modality_adjusted,folder_name,week_day,week_num,info_header,HD_FoV,slice_thickness,num_slices,pixel_spacing,contrast,UID,path
0,20715,2018-04-18,week1,Fraction1,3.0,Chemoradiation,rCT3 protonen 2.0 HD_FoV imar iMAR,3,16,rCT3,1,2,206,"[1.0546875, 1.0546875]",0,1.3.12.2.1107.5.1.4.95434.30000018041806221769...,//zkh/appdata/RTDicom/Projectline_HNC_modellin...
1,20715,2018-05-02,week3,Fraction2,13.0,Chemoradiation,rCT13 protonen 2.0 I40s 3 imar iMAR,3,18,rCT13,0,2,206,"[0.9765625, 0.9765625]",0,1.3.12.2.1107.5.1.4.95434.30000018050206052379...,//zkh/appdata/RTDicom/Projectline_HNC_modellin...
2,20715,2018-05-02,week3,Fraction2,13.0,Chemoradiation,w3CT_reg_bsl,3,18,w3CT_reg_bsl,0,2,206,"[0.9765625, 0.9765625]",0,1.3.12.2.1107.5.1.4.95434.30000018032706004286...,//zkh/appdata/RTDicom/Projectline_HNC_modellin...
3,20715,2018-05-09,week4,Fraction3,18.0,Chemoradiation,rCT18 protonen 2.0 I40s 3 imar iMAR,3,19,rCT18,0,2,206,"[0.9765625, 0.9765625]",0,1.3.12.2.1107.5.1.4.95434.30000018050905575678...,//zkh/appdata/RTDicom/Projectline_HNC_modellin...
4,303865,2015-12-23,week5,Fraction1,25.0,Chemoradiation,HerCTHH wk 3 2.0 I40s 3,3,52,wk3,0,2,195,"[0.9765625, 0.9765625]",0,1.3.12.2.1107.5.1.4.95434.30000015122307471636...,//zkh/appdata/RTDicom/Projectline_HNC_modellin...


# Transferring Phase
In this phase, all the new weeklyCTs will be transferred into the determined and final folder. If there is the same weeklyCT with the same Patient ID, fraction and week number, this program skips that folder.

In [1265]:
def transfering_weeklycts(directory_df):

    destination_main = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART_DATA1' # CONFIG File

    # Keep track of the patients
    previous_patient_id = None

    # For each CT scan, iterate through the information
    for index, row in directory_df.iterrows():
        current_patient_id = row.ID

        if current_patient_id != previous_patient_id:
            print(f'Transferring data for patient {current_patient_id} is started')

        # List the direction to the DICOM files
        dicom_files = os.listdir(row.path)

        # Make the destination directory
        final_destination_path = os.path.join(destination_main, str(row.ID), str(f'{row.treatment_week}_{row.Fraction_magnitude}'))

        # Try to make the destination directory
        try:
            os.makedirs(final_destination_path, exist_ok=True)
        except Exception as e:
            print(f"Error creating directory: {e}")

        # Loop through all the CT images
        for file in dicom_files:
            src_path = os.path.join(row.path, file)
            dst_path = os.path.join(final_destination_path, file)

            # Transfer the data to the destination directory
            try:
                shutil.copy(src_path, dst_path)  # Use shutil.copy to copy the file

            except Exception as e:
                print(f"Error copying file: {e}")
    
        if current_patient_id != previous_patient_id :
            previous_patient_id  = current_patient_id
            print(f'Transferring data for patient {current_patient_id} is ended {index}')


In [1266]:
transfering_weeklycts(final_general_df)

Transferring data for patient 20715 is started
Transferring data for patient 20715 is ended 0
Transferring data for patient 303865 is started
Transferring data for patient 303865 is ended 4
Transferring data for patient 1320543 is started
Transferring data for patient 1320543 is ended 7


# Plotting Phase
This phase is the last phase in this analyzing notebook. The watching dog program will be added to the main program. 

To present a summary and also some information about my dataset, I decided to make a representative panel that can be used to transfer much information to the user. In this panel. I used the base structure designed with one of my friends, and then I added the plots and datasets to it. I made this plotting class based on two endpoints (xerostomia 6-month and zerostomia 12-month); however, it is adjustable for other endponts as well.

In [None]:
class BarChart:
    def __init__(self, df, columns_names, param):
        self.df = df
        self.columns_names = columns_names
        self.param = param
    
    def bar_chart_panel_maker(self):
        """
        Type: Instance method

        Input: ---

        Explanation: This method assembel all the elements of the pannel and also uses 
                     _update_choices to update the choices of each parameter.

        Output: 1. interactive bar plot pannel
        """

        # Initialize the widgets
        select_param, multi_choice_param = self.widgit_maker()

        # Update the choices
        @pn.depends(select_param[1].param.value, watch=True)
        def _update_choices(select_param):
            """
            Type: dependent function

            Input: 1. the parameter selected by a user

            Explanation: This function is responsible for updating the choices for each parameter.

            Output: ---
            """
            self.param = select_param
            # Extract the new dataset for sketching
            sketch_series = self.data_maker()

            # Change the options and values of the multi_choice widgit for each parameter.
            multi_choice_param[1].options = list(sketch_series.index)
            multi_choice_param[1].value = list(sketch_series.index)

        # Bind all the elements of the interactive plot
        inter_plot = pn.bind(self.render_plot, parameter=select_param[1], choices=multi_choice_param[1])

        # encapsulating the iter_plot to link the plot with the widgits
        final_plot = pn.Row(pn.Column(select_param, multi_choice_param), pn.pane.Bokeh(inter_plot)) 

        return final_plot
    
    def widgit_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This method makes selection bar and multichoice box.

        Output: 1. Selection bar.
                2. multi-choice box.
        """

        # Extract the initial dataset
        multi_initial_items = self.data_maker()

        # Make the select bar and assign a name to it
        param_name = pn.pane.Markdown(f'**Parameters**')
        select_param = pn.widgets.Select(value=self.param, options=self.columns_names)
        select_param_layout = pn.Column(param_name, select_param)

        # Make the multi-choice box and assign a name to it 
        item_name = pn.pane.Markdown(f'**Items**')
        multi_choice_param = pn.widgets.MultiChoice( value = list(multi_initial_items.index),
                                                    options= list(multi_initial_items.index))
        choice_item_layout = pn.Column(item_name, multi_choice_param)

        return select_param_layout, choice_item_layout

    
    def render_plot(self, parameter, choices):
        """
        Type: instance method

        Input: 1. parameter: is the parameter like 'year' that we want to sketch its bar plot.
               2. choices: is the group of available items for each parameter.

        Explanation: This metehod sketch the bar plot based on the parameter of interest and return it.

        Output: 1. bar plot.
        """

        # Extract the dataset
        sketch_series = self.data_maker()

        # Extract the dataset for x and y axis
        xaxis = [item for item in sketch_series.index if item in choices]
        yaxis = [sketch_series[value] for value in xaxis]

        # Make  the dataset in ColumnDataSource format
        source = ColumnDataSource(dict(x = xaxis, y = yaxis))

        # Design the plot infrestructure 
        fig = self.fig_maker(parameter, source, xaxis)

        # Add bar plots to the main plot
        fig.vbar(x=xaxis, top=yaxis, width=0.8, color='#EE8262')

        return fig

    def fig_maker(self, parameter, source, xaxis):
        """
        Type: instance method

        Input: 1. parameter: is the parameter like 'year' that we want to sketch its bar plot.
               2. choices: is the group of available items for each parameter.

        Explanation: This metehod sketch the bar plot based on the parameter of interest and return it.

        Output: 1. bar plot.
        """

        # Make labels
        y_label = 'number of patients'
        title = f'number of patients per {parameter}'

        # If the dataset has a list of string as the xaxis make the following figure
        try:
            fig = figure(width=800, height=600,        
                            x_axis_label = parameter,
                            y_axis_label = y_label,
                            title=title,
                            x_range=xaxis)

        # Otherwise, make the following figure
        except:
            fig = figure(width=800, height=600,        
                            x_axis_label = parameter,
                            y_axis_label = y_label,
                            title=title)

        # Add labels to the main figure
        labels = LabelSet(x='x', y='y', text='y', level='glyph',
                        text_align='center', y_offset=5, source=source)
        
        fig.title.align = 'center'
        fig.add_layout(labels)
        return fig

    def data_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod makes the dataset that should be sketch as a barplot.

        Output: 1. figure dataset.
        """
        if self.param == 'Year':
            sketch_series = self.df.RTSTART.dt.year.value_counts()

        elif self.param == 'Count_of_weeks':
            sketch_series = self.week_count_maker().sort_index()

        elif self.param == 'age':
            sketch_series = self.age_count_maker().sort_index()

        elif self.param == 'First_day':
            sketch_series = self.fday_count_maker()

        elif self.param == 'n_stage' or self.param == 't_stage':
            sketch_series = self.stage_count_maker().sort_index()

        elif self.param == 'xer_06' or self.param == 'xer_12':
            sketch_series = self.xer_count_maker().sort_index()  

        elif self.param == 'xer_trend':
            sketch_series = self.trend_count_maker().sort_index()   

        else:
            sketch_series = self.df[self.param].value_counts()
        
        return sketch_series

    def trend_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of different trend for patient who are
                     diagnosed with xerostomia.

        Output: 1. a series that contains number of patients per different trends.
        """        
        trend_df = self.df[['xer_06', 'xer_12']]
        stage_list = [self.map_element_to_trend(element) for _, element in trend_df.iterrows()]
        stage_df = pd.DataFrame(stage_list).transpose()
        return stage_df.stack().value_counts()

    def xer_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of labels for xerostomia endpoints.

        Output: 1. a series that contains number of patients per label.
        """
        xer_series = self.df[self.param]
        xer_list = [self.map_element_to_xer(element) for element in xer_series]
        xer_df = pd.DataFrame(xer_list).transpose()
        return xer_df.stack().value_counts()        

    def stage_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of patients in different t or n stages.

        Output: 1. a series that contains number of patients per different t or n stages.
        """
        stage_series = self.df[self.param]
        stage_list = [self.map_element_to_stage(element, self.param) for element in stage_series]
        stage_df = pd.DataFrame(stage_list).transpose()
        return stage_df.stack().value_counts()

    def fday_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of patients per the day of the week that day start
                     their treatment.

        Output: 1. a series that contains number of patients per day of the week.
        """
        fday_series = self.df.First_day
        fday_list = [self.map_element_to_fday(element) for element in fday_series]
        fday_df = pd.DataFrame(fday_list).transpose()
        fday_df = fday_df.stack().value_counts()
        
        # Define the desired order of days of the week
        desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']

        # Reindex the series to match the desired order
        ordered_series = fday_df.reindex(desired_order)    
        return ordered_series

    def age_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of patients per age group.

        Output: 1. a series that contains number of patients per age group.
        """
        age_series = self.df.age

        age_list = [self.map_element_to_age(element) for element in age_series]

        age_df = pd.DataFrame(age_list).transpose()
        return age_df.stack().value_counts()


    def week_count_maker(self):
        """
        Type: instance method

        Input: ---

        Explanation: This metehod counts the number of weekly CTs for each week and make a series of number
                     of patients per each week with number of patients as values and week number as the index.

        Output: 1. a series that contains number of patients per week number.
        """
        fraction_df = self.df.loc[:,'Fraction1':'Fraction9']
        fraction_df['accelerated_rt'] = self.df['accelerated_rt']
        week_list = [fraction_df.apply(lambda row: self.map_element_to_week(row[counter], row[-1]), axis=1) 
                    for counter in range(fraction_df.shape[1] - 1)]

        week_df = pd.DataFrame(week_list).transpose()
        return week_df.stack().value_counts()
    
    @staticmethod
    def map_element_to_week(element, accelerated_rt):
        """
        Type: static method

        Input: 1. element: Is the element that we want to find a week for it.
               2. accelerated_rt: is the plan type of the patient.

        Explanation: this static method get an element let's say 7 with a RT plan type, then assign a proper
                     week for this element and return the week.

        Output: 1. is a week.
        """
        if accelerated_rt == 0:
            element_ranges = {(0, 5): 'Week1', (5, 10): 'Week2', (10, 15): 'Week3', (15, 20): 'Week4', 
                              (20, 25): 'Week5', (25, 30): 'Week6', (30, 35): 'Week7'}

        else:
            element_ranges = {(0, 6): 'Week1', (6, 12): 'Week2', (12, 18): 'Week3',
                           (18, 24): 'Week4', (24, 30): 'Week5', (30, 36): 'Week6'}
        
        for key in element_ranges.keys():

            if element > key[0] and element <= key[1]:
                return element_ranges[key]

    @staticmethod
    def map_element_to_age(individual_age):
        """
        Type: static method

        Input: 1. individual_age: Is the element that we want to find a an age category for .

        Explanation: this static method get an element and find a proper age category for it.

        Output: 1. is a age category.
        """
        age_conditions_dict = {(0, 18):'Under 18', (18, 29): '20-29', (29, 39): '30-39',
                               (39, 49): '40-49', (49, 59): '50-59', (59, 69): '60-69',
                               (69, 79): '70-79', (79, 89): '80-89', (89, 99): '90-99'}

        for key in age_conditions_dict.keys():

            if individual_age > key[0] and individual_age <= key[1]:
                return age_conditions_dict[key]
    
    @staticmethod
    def map_element_to_fday(individual_day):
        """
        Type: static method

        Input: 1. individual_day: Is the element that we want to find a day in week for.

        Explanation: this static method get an element and find a proper day in the week.

        Output: 1. is a day name.
        """
        fday_conditions_dict = {1: 'Monday', 2: 'Tuesday', 3:'Wednesday',
                                4: 'Thursday', 5: 'Friday'}
        
        return fday_conditions_dict[individual_day]
    

    @staticmethod
    def map_element_to_stage(individual_stage, stage_type):
        """
        Type: static method

        Input: 1. individual_stage: Is the element that we want to find a t/n stage for.
               2. stage_type: is the type of stage (n_stage or t_stage)

        Explanation: this static method get an element and find a proper stage.

        Output: 1. is a stage.
        """
        if stage_type == 'n_stage':
            stage_conditions_dict = {('N0'): 'N0', ('N1'): 'N1', ('N2','N2a', 'N2b', 'N2c'): 'N2',
                                  ('N3'): 'N3'}

        else:
            stage_conditions_dict = {('Tis', 'T0', 'T1'): 'T1', ('T2'): 'T2', ('T3'): 'T3',
                                  ('T4a', 'T4b'): 'T4'}

        for key in stage_conditions_dict.keys():
            try:
                if individual_stage in key:
                    return stage_conditions_dict[key]
            
            except TypeError:
                return 'None'   

    @staticmethod
    def map_element_to_xer(individual_xer):
        """
        Type: static method

        Input: 1. individual_xer: Is the element that we want to find a xerostomia label for.

        Explanation: this static method get an element and find a xerostomia endpoint label.

        Output: 1. is a xerostomia endpoint label.
        """
        xer_conditions_dict = {2: 'Positive', 1: 'Negative', 0: 'Not Available'}
        return xer_conditions_dict[individual_xer]

    @ staticmethod
    def map_element_to_trend(individual_trend):
        """
        Type: static method

        Input: 1. individual_trend: Is the element that we want to find a xerostomia trend for.

        Explanation: this static method get an element and find xerostomia trend.

        Output: 1. is a xerostomia trend.
        """
        trend_conditions_dict = {(1, 1):('Negative, Negative'),
                                 (2, 1):('Positive, Negative'),
                                 (1, 2):('Negative, Positive'),
                                 (2, 2):('Positive, Positive')} 
        
        for key in trend_conditions_dict.keys():

            if individual_trend[0] == key[0] and individual_trend[1] == key[1]:
                return trend_conditions_dict[key]

In [None]:
class TextPresentor():

    def text_home(self):
        text = pn.pane.Markdown("""
                                ## **Introduction**
                                This panel is made to explain some of the features of the dataset used in my research.
                                It contains one main dataset contains **455** parients. The patients in this dataset can have
                                12- or 6- month endpoint for xerostomia. Moreover, 12 month dataset is a subset of the main
                                dataset that contains **345** patients whose patients only have 12-month endpoint for xerostomia.
                                The patients in the mentioned dataset can have endpoint for 6-month xerostomia. Moreover, the 6-month
                                dataset (contains **418** patients) only contains the patients with 6-month xerostomia endpoint who can have
                                12-month xerostomia endpoint. Finally, 12-6 month dataset (with **310** patients) contains the patients who
                                have both of the endpoints.  

                                ## **Features**
                                The following features are evaluated in this panel:

                                1. **Year**: This bar chart contains the number of patients based on the RT start year.

                                2. **First_day**: This bar chart contains the number of patients based on the RT start weekday. 

                                3. **Number_of_weeklyCTs**: This bar chart investigates for each number of weekly CTs how many patients we have.

                                4. **Modality_adjustment**: This bar chart depicts the number of patients with different treatment approaches in each dataset.

                                5. **Count_of_weeks**: This bar chart evaluate how many patient we have for each week CT.

                                6. **sex**: This bar chart shows the distribution of gender for each cohort.

                                7. **tumor_location**: This bar chart presents the distribution of different tumor locations in each dataset.

                                8. **age**: This bar chart shows the distribution of patients in different age groups in each dataset.

                                9. **n_stage**: This bar chart shows the distribution of different stages of the number of lymph nodes involved in cancer.

                                10. **t_stage**: This bar chart shows the distribution of different stages of the cancer.

                                11. **xer_06**: This bar chart depicts the number of patients with positive, negative and even without 6-month xerostomia label.

                                12. **xer_12**: This bar chart depicts the number of patients with positive, negative and even without 12-month xerostomia label.
                                
                                13. **xer_trend**: This bar chart presents the trend of the side effect in each patient from 6-month endpoint to 12-month endpoint.
                                """)
        return text

    def text_total(self):
        text = pn.pane.Markdown("""
                                 ## Explanation
                                 This dataset contains **455** patients from which **345** patients have 12-month xerostimia endpoint, and **418** patients have
                                 6-month xerostomia endpoint. The extra columns in this bar plot refers to the number of patient who are diagnosed with
                                 positive and negative xerostomia 6 months and 12 months after irradiation.
                                 """)
        return text 

    def text_12month(self):
        text = pn.pane.Markdown("""
                                ## Explanation
                                This dataset contains **345** patients with 12-month xerostimia endpoint. The most important bar chart for this dataset is **Count_of_weeks**
                                since it contains the number of available weekly CTs per week that can be used as an estimation of the number of samples in the dataset to
                                train the model.
                                 """)
        return text 

    def text_6month(self):
        text = pn.pane.Markdown("""
                                ## Explanation
                                This dataset contains **418** patients with 6-month xerostimia endpoint. This dataset is larger than 6-month dataset, and can be a good starting
                                dataset for training different models.The most important bar chart for this dataset is **Count_of_weeks** since it contains the number of available
                                weekly CTs per week that can be used as an estimation of the number of samples in the dataset to train the model.
                                 """)
        return text
    
    def text_12_6_month(self):
        text = pn.pane.Markdown("""
                                ## Explanation
                                This dataset contains **310** patients with available 6-month and 12-month xerostimia endpoints, which is the smallest dataset. The most important
                                feature of this dataset is **xer_trend** bar chart since it depicts the trend ofxerostomia in the patients who have both xerostomia endpoints. As it
                                was expected, the number of negative labels are more than positive labels in this dataset.
                                 """)
        return text 

In [None]:

class Dashboard:
    '''
    This class creates a panel dashboard to which pages can be added
    
    Arguments: 
    title (str): title of the dashboard
    header_color (str): name of a color or hex color code
    css (str): raw css
    
    Returns:    
    dashboard object
    
    '''

    def __init__(self, title: str, header_color: str, css):
        # initialise dashboard
        self.dashboard = pn.template.BootstrapTemplate(title=title, header_background=header_color, sidebar_width=200)
        self.dashboard.main.extend([pn.pane.Markdown(''), pn.Column(width=1000)]) 
        self.main_page = self.dashboard.main[1]
        pn.extension(raw_css=[css])
        
        # variable to save all the pages
        self.pages = {}
        
        
    def add_page(self, title: str, show_page: bool, *contents):
        ''' 
        Adds a page to the dashboards and create a sidebar navigation button for it 
        
        Arguments:
        title      (str): title of the page
        show_page (bool): boolean to show the page when showing the dahsboard (if more pages have this as True the last page added will be shown)
        
        Returns:
        None
        '''
        sidebar_button = pn.widgets.Button(name=title, width=150, css_classes=['sidebar_button'])  # create sidebar button
        self.dashboard.sidebar.append(sidebar_button)  # append button to sidebar
        sidebar_button.on_click(self._update_page)  # callback
        self.pages[title] = [*contents]  # add the contents to the page dictionary
        if show_page:
            self._show_page(title)
    
    
    def _update_page(self, event):
        '''
        Private callback method to update the page when a sidebar button is clicked 
          
        Arguments:
        event (object): widget cacllback event
        
        Returns:
        None
        '''
        name = event.obj.name  # extract name from event
        self.main_page.clear()  # clear the main page
        self.main_page.append(pn.pane.Markdown(f'# {name}'))  # create title
        self.main_page.extend([item for item in self.pages[name]])  # add all of the contents to the page
        
        
    def _show_page(self, title: str):
        '''
        Private method that show the page of the given page title 
        
        Arguments:
        title (str): title of the page
        
        Returns:
        None
        '''
        self.main_page.clear()
        self.main_page.append(pn.pane.Markdown(f'# {title}'))
        self.main_page.extend([item for item in self.pages[title]])
            
            
    def show(self):
        '''Shows the dashboard''' 
        self.dashboard.show()

In [None]:

os.chdir('//zkh/appdata/RTDicom/Projectline_HNC_modelling/OPC_data/ART Hooman/WeeklyCT Dataset/Program & Docs/Plotting')
# CSS styling
css = '''
.sidebar_button .bk-btn-group button {
    border-radius: 6px;
    font-weight: bolder;
}

.option_button .bk-btn-default.bk-active {
    background-color: #00dcff38;
    font-weight: bold;
    border-color: black;
}
'''
# Text
text_obj = TextPresentor()
home_text = text_obj.text_home()
total_text = text_obj.text_total()
twelve_text = text_obj.text_12month()
six_text = text_obj.text_6month()
twelve_six_text = text_obj.text_12_6_month()

print(total_text)

# Initialize the Dashboard
dataset_db = Dashboard('Dataset', '#00C5CD', css)

# Home page
dataset_db.add_page('Home', True, home_text)

## Total Dataset page
# Assign the total dataset and desired columns
total_df = pd.read_excel('wk3_12month_df.xlsx').drop(columns=['Unnamed: 0'])
column_names_total = ['Year', 'First_day', 'Number_of_weeklyCTs', 'modality_adjusted', 'Count_of_weeks',
                      'gender', 'tumor_location', 'age', 'n_stage', 't_stage','xer_06', 'xer_12']

# Make the barplot for this dataset
bar_plot_total_obj = BarChart(total_df, column_names_total, 'Year')
bar_plot_total = bar_plot_total_obj.bar_chart_panel_maker()

# Add total dataset page to the panel
dataset_db.add_page('Total Datset', False, bar_plot_total, total_text)


## 6month Dataset page
# Assign the total dataset and desired columns
six_month_df = pd.read_excel('Overview_weeklyCT_patients_6month.xlsx').drop(columns=['Unnamed: 0'])
column_names_6month = ['Year', 'First_day', 'Number_of_weeklyCTs', 'modality_adjusted', 'Count_of_weeks',
                      'gender', 'tumor_location', 'age', 'n_stage', 't_stage']

# Make the barplot for this dataset
bar_plot_6month_obj = BarChart(six_month_df, column_names_6month, 'Year')
bar_plot_6month = bar_plot_6month_obj.bar_chart_panel_maker()

# Add 6month dataset page to the panel
dataset_db.add_page('6 month Dataset', False, bar_plot_6month, six_text)

## 12month Dataset page
# Assign the total dataset and desired columns
twelve_month_df = pd.read_excel('Overview_weeklyCT_patients_12month.xlsx').drop(columns=['Unnamed: 0'])
column_names_12month = ['Year', 'First_day', 'Number_of_weeklyCTs', 'modality_adjusted', 'Count_of_weeks',
                      'gender', 'tumor_location', 'age', 'n_stage', 't_stage']

# Make the barplot for this dataset
bar_plot_12month_obj = BarChart(twelve_month_df, column_names_12month, 'Year')
bar_plot_12month = bar_plot_12month_obj.bar_chart_panel_maker()

# Add 12month dataset page to the panel
dataset_db.add_page('12 Month Dataset', False, bar_plot_12month, twelve_text)

## 12- and 6- month Dataset page
# Assign the total dataset and desired columns
twelve_six_month_df = pd.read_excel('Overview_weeklyCT_patients_12_6_month.xlsx').drop(columns=['Unnamed: 0'])
column_names_12_6_month = ['Year', 'First_day', 'Number_of_weeklyCTs', 'modality_adjusted', 'Count_of_weeks',
                      'gender', 'tumor_location', 'age', 'n_stage', 't_stage', 'xer_trend']

# Make the barplot for this dataset
bar_plot_12_6_month_obj = BarChart(twelve_six_month_df, column_names_12_6_month, 'Year')
bar_plot_12_6_month = bar_plot_12_6_month_obj.bar_chart_panel_maker()

# Add 12-6month dataset page to the panel
dataset_db.add_page('12-6 Month Dataset', False, bar_plot_12_6_month, twelve_six_text)


dataset_db.show()



---

In [1029]:
path = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/Users/Hooman Bahrdo/Models/Deep_Learning/DL_NTCP_Xerostomia/datasets/dataset_old_v2/stratified_sampling_test_manual_94.csv'

dff = pd.read_csv(path, sep=';').drop(columns=['Unnamed: 0'])

In [1032]:
dff.xer_12.unique()

array([0, 1], dtype=int64)

In [None]:
path_dataset = '//zkh/appdata/RTDicom/Projectline_HNC_modelling/Users/Hooman Bahrdo/Deep_learning_datasets/Six_month_final df/datasets/dataset_old_v2/0'
