#### Task: Classification and Predicition of readmission with medication and diagnosis columns, based on ATC codes
**`TNE_BO_180` values (0 and 1 are used to classify readmission, where 1 means readmission). Further for prediction only the readmitted patient data is taken and a linear regression model is trained on it to predict the number of days for readmission.**
- Dataset Used: `/mnt/work/workbench/dipendrp/new-data/Full_ICD10_ATC.csv`
- Independent variable: `'age', 'remaining_time_countdown','var_no_dates_permonth', 'gender','closingcode', 'aftercode', 'Length_of_Episode', 
                                                   'Count_visit','Therapy_ratio', 'Examination_ratio', 'Advisory_ratio',
                                                   'TreatmentPlanning_ratio', 'Outpatient_ratio','Inpatient_daynight_ratio', 'Care_intensity', 'num_diagnoses', 'num_medications'`
- Dependent Variable: For classification its `TNE_BO_180` and for prediction its `TNE_NO_180` 
- Other Important Info: Dataset has NaN values. NaN has been replaced with 0

<br>
Description of work done:

1. In this scripts the first class `SplitMedicationDiagnosisInUniquePieces` splits all the diagnosia and medication column into unique pieces.
2. Then it saves the new data frame with unique diagnosis and medication as new `CSV` file named as `Split_Full_ICD10_ATC`
3. Then the second class `ClassifyReadmissionWithMedicationDiagnosis` takes the new `Split_Full_ICD10_ATC` dataset and perform classiciation and prediction


In [None]:
import re
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import random
from IPython.display import display
from IPython.display import Image
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score,confusion_matrix,precision_score,recall_score,f1_score
from sklearn.metrics import classification_report

from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt

import os
from dotenv import load_dotenv
load_dotenv()


######### Split Medication and Diagnosis In Unique Pieces ############
#                                                                    #
#                                                                    #
#                                                                    #
######################################################################

class SplitMedicationDiagnosisInUniquePieces:
    def __init__(self, file1):
        self.file1 = file1
        self.merged_df = None
        self.selected_column_merged_df = None
        self.without_diag_medic_selected_column_merged_df = None
    
    def load_data_to_split(self):
        Full_ICD10_ATC = pd.read_csv(self.file1)
        self.merged_df = Full_ICD10_ATC 
        self.merged_df = self.merged_df[['episode_id', 'num_diagnoses', 'num_medications', 'pasient', 'age',
                                       'remaining_time_countdown', 'var_no_dates_permonth', 'gender',
                                       'episode_order', 'islast', 'closingcode', 'aftercode',
                                       'episode_start_date', 'episode_end_date', 'tillnextepisode',
                                       'Length_of_Episode', 'Cat_LOE', 'TNE_BO_180', 'TNE_NO_180',
                                       'TNE_BO_365', 'TNE_NO_365', 'TNE_BO_730', 'TNE_NO_730', 'TNE_BO_1095',
                                       'TNE_NO_1095', 'Cat_LOE_desc', 'Count_visit', 'Cat_CV', 'Therapy_ratio',
                                       'Examination_ratio', 'Advisory_ratio', 'TreatmentPlanning_ratio',
                                       'Outpatient_ratio', 'Inpatient_ratio', 'Inpatient_day_ratio',
                                       'Inpatient_daynight_ratio', 'Care_intensity', 'age_group',
                                       'closingcode_0', 'closingcode_1', 'closingcode_2', 'closingcode_3',
                                       'closingcode_4', 'closingcode_5', 'closingcode_6', 'closingcode_9',
                                       'aftercode_1', 'aftercode_2', 'aftercode_3', 'aftercode_4',
                                       'aftercode_5', 'gender_0', 'F', 'M', 'MiddleChildhood', 'Preschooler',
                                       'Teenager', 'diagnoses', 'actual_med_Full_ATC']]
        
        self.merged_df = self.merged_df.copy(deep=True)
        return self.merged_df

        
    def split_diagnosis_medication_in_unique_pieces(self):
        newmerged_df = SplitMedicationDiagnosisInUniquePieces_Obj.load_data_to_split()
        columns = ['diagnoses', 'actual_med_Full_ATC']
        for col in columns:
            newmerged_df[col] = newmerged_df[col].apply(lambda d:[] if pd.isnull(d) else d)
            newmerged_df[col] = newmerged_df[col].str.replace("[", "")
            newmerged_df[col] = newmerged_df[col].str.replace("]", "")
            newmerged_df[col] = newmerged_df[col].str.replace("'", "")
            newmerged_df[col] = newmerged_df[col].str.replace(" ", "")
            newmerged_df[col] = newmerged_df[col].str.split(',')
            newmerged_df = newmerged_df.explode(col)
            
            # Show only top 100 columns names
            temp_store_100 = newmerged_df[col].value_counts()
            names_only = temp_store_100.index.tolist()
            
            original_columns = newmerged_df.columns.tolist()
            dummies = newmerged_df[col].str.get_dummies(sep=',')
            newmerged_df = pd.concat([newmerged_df, dummies], axis=1)
            newmerged_df = newmerged_df.drop(col, axis=1)
            
        newmerged_df.to_csv('Split_Full_ICD10_ATC.csv',index=False)
        
    
class ClassifyReadmissionWithMedicationDiagnosis:
    def __init__(self, file2):
        self.file2 = file2
        self.merged_df = None
        self.selected_column_merged_df = None
        self.without_diag_medic_selected_column_merged_df = None
    
    def load_data(self):
        original_df_1 = pd.read_csv(self.file2)
        self.merged_df = original_df_1[['num_diagnoses', 'num_medications', 'remaining_time_countdown', 'var_no_dates_permonth','Length_of_Episode', 'Count_visit', 'Therapy_ratio','Examination_ratio', 'Advisory_ratio',
        'TreatmentPlanning_ratio','Outpatient_ratio','Inpatient_daynight_ratio', 'Care_intensity', 'closingcode_0', 'closingcode_1', 'closingcode_2', 'closingcode_3','closingcode_4', 'closingcode_5', 'closingcode_6', 'closingcode_9',
        'aftercode_1', 'aftercode_2', 'aftercode_3', 'aftercode_4','aftercode_5', 'gender_0', 'F', 'M', 'MiddleChildhood', 'Preschooler','Teenager', 'TNE_NO_180','TNE_BO_180',
        '3131', '29622', '3151', '313', '3001', '304', '312', '3132', '3133', '3009', '3052', '2781', '3003', '2972', '3152', '3153', '2501', '1019', '563', '315', '495', '75021', '7981', '637', '1004', '476', '798', '939', '3034', '345', 
        '339', '3004', '340', '969', '5994', '7142', '2926', '3069', '785', '3008', '3891', '781', '316', '343', '2961', '20411', '2953', '561', '3012', '2903', '915', '300', '3689', '2951', '317', '3031', '949', '19111', '1002', '3781', 
        '789', '3453', '74711', '661', '301', '3401', '3678', '3679', '3671', '327', '2911', '296', '7522', '7581', '3276', '3033', '74712', '34512', '3011', '7373', '2713', '389', '3591', '1702', '3123', '3274', '75211', '2649', '2444', 
        '74713', '6562', '770', '871', '819', '302', '34511', '3711', '1008', '5552', '5551', '3811', '2801', '2952', '788', '2914', '3451', '733', '7512', '656', '368', '75112', '62611', '658', '3311', '760', '7551', '1994', '2022', '277', 
        '1015', '262', '745', '201', '5091', '3895', '2532', '306', '1551', '292', '7554', '5641', '3892', '7321', '1023', '759', '53011', '2445', '18911', '323', '7282', '69542', '5134', '930', '20421', '3672', '7561', '2791', '3869', '0704',
        '2442', '979', '758', '3592', '496', '2643', '3695', '736','3381', '75121', '7061', '565', '2612', '24521', '1009', '773', '499', '357', '57181', '1031', '947', '347', '079', '3501', '136', '2924', '5965', '1028', '55521', '42711', '1701', 
        '835', '1011', '830', '0792', '28731', '6964', '5611', '75561', '1000', '506', '27614', '3684', '818', '6563', '748', '3275', '1029', '53014', '3382', '5302', '472', '295', '535', '32741', '25311', '324', '3894', '2553', '4181', '817', 
        '341', '6041', '7472', '739', '75011', '981', '1911', '2564', '691', '6564', '25011', '303', '756', '2441', '6941', '1033', '3782', '7491', '5772', '3462', '802', '344', '3681', '8003', '749', '558', '3484', '6264', '0703', '4332', 
        '7531', '1034', '564', '3621', '2531', '365','396', '4589', '7471', '1010', '6491', '7492', '1499', '288', '5921', '4801', '260', '7556', '1013', '4742', '4338', '805', '2701', '259', '28811', '907', '8032', '8033', '591', '532', '946', 
        '2768', '2602', '5305', '2921', '010', '510', '71011', '4741', '657', '75013', '2922', '2502', '2614', '7169','N06BA04', 'N06AB06', 'N06BA09', 'A06BA04', 'N06AB03', 'R06AD01', 'N05CH01', 'N05AX08', 'N05AH04', 'N05AX12', 'N06BA12', 'N03AX09', 
        'NO6BAO4', 'N06AB10', 'N05AH03','NO6BA04', 'N05AA02', 'N06AX03', 'N05CF01', 'A11EA', 'A12AX', 'N05BA01', 'N06AX16', 'N03AG01', 'N05AF03', 'N02CX02', 'N05CF02', 'N06AA04', 'R06AE07', 'G03AA09', 'N02CC03', 'G03AA07', 'N03AE01', 'G03AC09', 
        'A10BA02', 'H01BA02', 'B03AA07','N06AB04', 'A02BC05', 'N06AX11', 'N05BB01', 'N06AB05', 'N05CD02', 'N05BA04', 'J01FA10', 'D07AC13', 'N01BB20', 'A06AD65', 'A03FA01', 'N06BA02', 'D10AD03', 'G03AA12', 'G03AA13', 'D07BC01', 'S01GX02', 'A02BA02', 
        'G03AC06', 'NO6BAO9', 'N02CC01', 'N02AA59', 'J01CF01', 'D07AA02', 'R01AD09', 'D07AC01', 'N03AX14', 'G03FB05', 'N03AF01', 'A12BA02', 'D07AB08', 'A06AD11', 'G03AD02', 'S01AA01', 'N02BA01', 'D01AC20', 'R01AC02', 'M01AE01', 'S01GX09', 'A02BC01', 
        'A12BA01', 'C02AC02', 'D07XC01', 'H02AB02', 'N02BE01', 'R05DA01', 'D06AX01', 'S01AA13', 'J05AB01', 'C09AA02', 'A07AA02', 'D06AX05', 'D10AD53', 'D10AD01', 'N07BA03', 'C07AA05', 'G03CA03', 'N06AX12', 'J01CA08', 'N07BA02', 'D06AA03', 'D07AB02', 
        'R03BA05', 'G03HB01', 'B03BA03', 'D06BB03', 'R06AX27', 'N01BB02', 'R03BA02', 'R03AC03', 'J01CE02', 'D09AA02', 'N06AA09']]
        
        
       
        # fill nan values in independent column
        self.merged_df['num_diagnoses'].fillna(0, inplace=True)
        self.merged_df['num_medications'].fillna(0, inplace=True)
        self.merged_df['Outpatient_ratio'].fillna(0, inplace=True)
        self.merged_df['Inpatient_daynight_ratio'].fillna(0, inplace=True)  
        self.merged_df['Therapy_ratio'].fillna(0, inplace=True)
        self.merged_df['Examination_ratio'].fillna(0, inplace=True)
        self.merged_df['Advisory_ratio'].fillna(0, inplace=True)
        self.merged_df['TreatmentPlanning_ratio'].fillna(0, inplace=True)
        
        
        # define dependent variables
        self.dependent_variable_TNE_BO_180 = self.merged_df[['TNE_BO_180']]
        
        self.independent_variable = self.merged_df[['num_diagnoses', 'num_medications', 'remaining_time_countdown', 'var_no_dates_permonth','Length_of_Episode', 'Count_visit', 'Therapy_ratio','Examination_ratio', 'Advisory_ratio',
        'TreatmentPlanning_ratio','Outpatient_ratio','Inpatient_daynight_ratio', 'Care_intensity', 'closingcode_0', 'closingcode_1', 'closingcode_2', 'closingcode_3','closingcode_4', 'closingcode_5', 'closingcode_6', 'closingcode_9',
        'aftercode_1', 'aftercode_2', 'aftercode_3', 'aftercode_4','aftercode_5', 'gender_0', 'F', 'M', 'MiddleChildhood', 'Preschooler','Teenager',
        '3131', '29622', '3151', '313', '3001', '304', '312', '3132', '3133', '3009', '3052', '2781', '3003', '2972', '3152', '3153', '2501', '1019', '563', '315', '495', '75021', '7981', '637', '1004', '476', '798', '939', '3034', '345', 
        '339', '3004', '340', '969', '5994', '7142', '2926', '3069', '785', '3008', '3891', '781', '316', '343', '2961', '20411', '2953', '561', '3012', '2903', '915', '300', '3689', '2951', '317', '3031', '949', '19111', '1002', '3781', 
        '789', '3453', '74711', '661', '301', '3401', '3678', '3679', '3671', '327', '2911', '296', '7522', '7581', '3276', '3033', '74712', '34512', '3011', '7373', '2713', '389', '3591', '1702', '3123', '3274', '75211', '2649', '2444', 
        '74713', '6562', '770', '871', '819', '302', '34511', '3711', '1008', '5552', '5551', '3811', '2801', '2952', '788', '2914', '3451', '733', '7512', '656', '368', '75112', '62611', '658', '3311', '760', '7551', '1994', '2022', '277', 
        '1015', '262', '745', '201', '5091', '3895', '2532', '306', '1551', '292', '7554', '5641', '3892', '7321', '1023', '759', '53011', '2445', '18911', '323', '7282', '69542', '5134', '930', '20421', '3672', '7561', '2791', '3869', '0704',
        '2442', '979', '758', '3592', '496', '2643', '3695', '736','3381', '75121', '7061', '565', '2612', '24521', '1009', '773', '499', '357', '57181', '1031', '947', '347', '079', '3501', '136', '2924', '5965', '1028', '55521', '42711', '1701', 
        '835', '1011', '830', '0792', '28731', '6964', '5611', '75561', '1000', '506', '27614', '3684', '818', '6563', '748', '3275', '1029', '53014', '3382', '5302', '472', '295', '535', '32741', '25311', '324', '3894', '2553', '4181', '817', 
        '341', '6041', '7472', '739', '75011', '981', '1911', '2564', '691', '6564', '25011', '303', '756', '2441', '6941', '1033', '3782', '7491', '5772', '3462', '802', '344', '3681', '8003', '749', '558', '3484', '6264', '0703', '4332', 
        '7531', '1034', '564', '3621', '2531', '365','396', '4589', '7471', '1010', '6491', '7492', '1499', '288', '5921', '4801', '260', '7556', '1013', '4742', '4338', '805', '2701', '259', '28811', '907', '8032', '8033', '591', '532', '946', 
        '2768', '2602', '5305', '2921', '010', '510', '71011', '4741', '657', '75013', '2922', '2502', '2614', '7169','N06BA04', 'N06AB06', 'N06BA09', 'A06BA04', 'N06AB03', 'R06AD01', 'N05CH01', 'N05AX08', 'N05AH04', 'N05AX12', 'N06BA12', 'N03AX09', 
        'NO6BAO4', 'N06AB10', 'N05AH03','NO6BA04', 'N05AA02', 'N06AX03', 'N05CF01', 'A11EA', 'A12AX', 'N05BA01', 'N06AX16', 'N03AG01', 'N05AF03', 'N02CX02', 'N05CF02', 'N06AA04', 'R06AE07', 'G03AA09', 'N02CC03', 'G03AA07', 'N03AE01', 'G03AC09', 
        'A10BA02', 'H01BA02', 'B03AA07','N06AB04', 'A02BC05', 'N06AX11', 'N05BB01', 'N06AB05', 'N05CD02', 'N05BA04', 'J01FA10', 'D07AC13', 'N01BB20', 'A06AD65', 'A03FA01', 'N06BA02', 'D10AD03', 'G03AA12', 'G03AA13', 'D07BC01', 'S01GX02', 'A02BA02', 
        'G03AC06', 'NO6BAO9', 'N02CC01', 'N02AA59', 'J01CF01', 'D07AA02', 'R01AD09', 'D07AC01', 'N03AX14', 'G03FB05', 'N03AF01', 'A12BA02', 'D07AB08', 'A06AD11', 'G03AD02', 'S01AA01', 'N02BA01', 'D01AC20', 'R01AC02', 'M01AE01', 'S01GX09', 'A02BC01', 
        'A12BA01', 'C02AC02', 'D07XC01', 'H02AB02', 'N02BE01', 'R05DA01', 'D06AX01', 'S01AA13', 'J05AB01', 'C09AA02', 'A07AA02', 'D06AX05', 'D10AD53', 'D10AD01', 'N07BA03', 'C07AA05', 'G03CA03', 'N06AX12', 'J01CA08', 'N07BA02', 'D06AA03', 'D07AB02', 
        'R03BA05', 'G03HB01', 'B03BA03', 'D06BB03', 'R06AX27', 'N01BB02', 'R03BA02', 'R03AC03', 'J01CE02', 'D09AA02', 'N06AA09']]
        
        return self.merged_df, self.dependent_variable_TNE_BO_180, self.independent_variable 
    
    # To train and view the reasult of binary and multiclass classifier        
    def train_classifier_with_medication_diagnosis(self):
        dependent_variables = [self.dependent_variable_TNE_BO_180]
        
        # to define the weight of output class, to resolve imbalanced output/dataset
        dependent_variable_names = {'self.dependent_variable_TNE_BO_180': 'TNE_BO_180'}
        class_weights = {'TNE_BO_180': {0:10, 1:270}}
              
        #for dependent_variable in dependent_variables:
        for variable_name, dependent_variable in zip(dependent_variable_names.values(), dependent_variables):
            logistic_prediction_model = LogisticRegression(class_weight=class_weights[variable_name])
            X_train, X_test, y_train, y_test = train_test_split(self.independent_variable, dependent_variable, train_size=0.7)
            logistic_prediction_model.fit(X_train,y_train)
            y_pred = logistic_prediction_model.predict(X_test)
            
            # Checks if the output category for classification is binary or multiclass
            category_count = int(y_train.nunique())
            print(f'\n****** Evaluation result {variable_name} as dependent variable ******')
            # For binary class classification 
            if category_count == 2:
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred, average='binary')
                recall = recall_score(y_test, y_pred, average='binary')
                f1 = f1_score(y_test, y_pred, average='binary')
                
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1score: {f1}")
                print("Confused matrix:")
                conf_matrix = confusion_matrix(y_test, y_pred)
                tn, fp, fn, tp = conf_matrix.ravel()
                print(conf_matrix)
                print(f"True Negative (TN): {tn}")
                print(f"False Positive (FP): {fp}")
                print(f"False Negative (FN): {fn}")
                print(f"True Positive (TP): {tp}")
                print(classification_report(y_test, y_pred))
            
            # For binary class classification 
            else:
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred, average='weighted')
                recall = recall_score(y_test, y_pred, average='weighted')
                f1 = f1_score(y_test, y_pred, average='weighted')
                
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1score: {f1}")
                conf_matrix = confusion_matrix(y_test, y_pred)
                print("Confused matrix:")
                print(conf_matrix)
                print(classification_report(y_test, y_pred))
                
    def prediction_with_medication_diagnosis(self):
        # Select all corresponding columns with values in TNE_NO_180
        self.merged_df = self.merged_df.dropna(subset=['TNE_NO_180'])
        
        # define dependent variables
        self.dependent_variable_TNE_NO_180 = self.merged_df[['TNE_NO_180']]
        # define independent variables 
        self.independent_variable = self.merged_df[['num_diagnoses', 'num_medications', 'remaining_time_countdown', 'var_no_dates_permonth','Length_of_Episode', 'Count_visit', 'Therapy_ratio','Examination_ratio', 'Advisory_ratio',
        'TreatmentPlanning_ratio','Outpatient_ratio','Inpatient_daynight_ratio', 'Care_intensity', 'closingcode_0', 'closingcode_1', 'closingcode_2', 'closingcode_3','closingcode_4', 'closingcode_5', 'closingcode_6', 'closingcode_9',
        'aftercode_1', 'aftercode_2', 'aftercode_3', 'aftercode_4','aftercode_5', 'gender_0', 'F', 'M', 'MiddleChildhood', 'Preschooler','Teenager',
        '3131', '29622', '3151', '313', '3001', '304', '312', '3132', '3133', '3009', '3052', '2781', '3003', '2972', '3152', '3153', '2501', '1019', '563', '315', '495', '75021', '7981', '637', '1004', '476', '798', '939', '3034', '345', 
        '339', '3004', '340', '969', '5994', '7142', '2926', '3069', '785', '3008', '3891', '781', '316', '343', '2961', '20411', '2953', '561', '3012', '2903', '915', '300', '3689', '2951', '317', '3031', '949', '19111', '1002', '3781', 
        '789', '3453', '74711', '661', '301', '3401', '3678', '3679', '3671', '327', '2911', '296', '7522', '7581', '3276', '3033', '74712', '34512', '3011', '7373', '2713', '389', '3591', '1702', '3123', '3274', '75211', '2649', '2444', 
        '74713', '6562', '770', '871', '819', '302', '34511', '3711', '1008', '5552', '5551', '3811', '2801', '2952', '788', '2914', '3451', '733', '7512', '656', '368', '75112', '62611', '658', '3311', '760', '7551', '1994', '2022', '277', 
        '1015', '262', '745', '201', '5091', '3895', '2532', '306', '1551', '292', '7554', '5641', '3892', '7321', '1023', '759', '53011', '2445', '18911', '323', '7282', '69542', '5134', '930', '20421', '3672', '7561', '2791', '3869', '0704',
        '2442', '979', '758', '3592', '496', '2643', '3695', '736','3381', '75121', '7061', '565', '2612', '24521', '1009', '773', '499', '357', '57181', '1031', '947', '347', '079', '3501', '136', '2924', '5965', '1028', '55521', '42711', '1701', 
        '835', '1011', '830', '0792', '28731', '6964', '5611', '75561', '1000', '506', '27614', '3684', '818', '6563', '748', '3275', '1029', '53014', '3382', '5302', '472', '295', '535', '32741', '25311', '324', '3894', '2553', '4181', '817', 
        '341', '6041', '7472', '739', '75011', '981', '1911', '2564', '691', '6564', '25011', '303', '756', '2441', '6941', '1033', '3782', '7491', '5772', '3462', '802', '344', '3681', '8003', '749', '558', '3484', '6264', '0703', '4332', 
        '7531', '1034', '564', '3621', '2531', '365','396', '4589', '7471', '1010', '6491', '7492', '1499', '288', '5921', '4801', '260', '7556', '1013', '4742', '4338', '805', '2701', '259', '28811', '907', '8032', '8033', '591', '532', '946', 
        '2768', '2602', '5305', '2921', '010', '510', '71011', '4741', '657', '75013', '2922', '2502', '2614', '7169','N06BA04', 'N06AB06', 'N06BA09', 'A06BA04', 'N06AB03', 'R06AD01', 'N05CH01', 'N05AX08', 'N05AH04', 'N05AX12', 'N06BA12', 'N03AX09', 
        'NO6BAO4', 'N06AB10', 'N05AH03','NO6BA04', 'N05AA02', 'N06AX03', 'N05CF01', 'A11EA', 'A12AX', 'N05BA01', 'N06AX16', 'N03AG01', 'N05AF03', 'N02CX02', 'N05CF02', 'N06AA04', 'R06AE07', 'G03AA09', 'N02CC03', 'G03AA07', 'N03AE01', 'G03AC09', 
        'A10BA02', 'H01BA02', 'B03AA07','N06AB04', 'A02BC05', 'N06AX11', 'N05BB01', 'N06AB05', 'N05CD02', 'N05BA04', 'J01FA10', 'D07AC13', 'N01BB20', 'A06AD65', 'A03FA01', 'N06BA02', 'D10AD03', 'G03AA12', 'G03AA13', 'D07BC01', 'S01GX02', 'A02BA02', 
        'G03AC06', 'NO6BAO9', 'N02CC01', 'N02AA59', 'J01CF01', 'D07AA02', 'R01AD09', 'D07AC01', 'N03AX14', 'G03FB05', 'N03AF01', 'A12BA02', 'D07AB08', 'A06AD11', 'G03AD02', 'S01AA01', 'N02BA01', 'D01AC20', 'R01AC02', 'M01AE01', 'S01GX09', 'A02BC01', 
        'A12BA01', 'C02AC02', 'D07XC01', 'H02AB02', 'N02BE01', 'R05DA01', 'D06AX01', 'S01AA13', 'J05AB01', 'C09AA02', 'A07AA02', 'D06AX05', 'D10AD53', 'D10AD01', 'N07BA03', 'C07AA05', 'G03CA03', 'N06AX12', 'J01CA08', 'N07BA02', 'D06AA03', 'D07AB02', 
        'R03BA05', 'G03HB01', 'B03BA03', 'D06BB03', 'R06AX27', 'N01BB02', 'R03BA02', 'R03AC03', 'J01CE02', 'D09AA02', 'N06AA09']]
        
        dependent_variables = [self.dependent_variable_TNE_NO_180]
        dependent_variable_names = {'self.dependent_variable_TNE_NO_180':'TNE_NO_180'}
        #for dependent_variable in dependent_variables:
        for variable_name, dependent_variable in zip(dependent_variable_names.values(), dependent_variables):
            linear_prediction_model = LinearRegression()
            X_train, X_temp, y_train, y_temp = train_test_split(self.independent_variable, dependent_variable, train_size=0.7)
            X_test,X_eval,y_test,y_eval = train_test_split(X_temp,y_temp,test_size=0.33)
            linear_prediction_model.fit(X_train,y_train)
            y_pred = linear_prediction_model.predict(X_test)
            print(f'\n****** Prediction Model Evaluation result  {variable_name} as dependent variable ******')
            y_pred = linear_prediction_model.predict(X_test)
            r2 = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            print('R-squared:', r2)
            print('Mean squared error:', mse)
                

SplitMedicationDiagnosisInUniquePieces_Obj = SplitMedicationDiagnosisInUniquePieces(os.getenv("FILE_Full_ICD10_ATC_PATH"))
SplitMedicationDiagnosisInUniquePieces_Obj.split_diagnosis_medication_in_unique_pieces()

ClassifyReadmissionWithMedicationDiagnosis_Obj = ClassifyReadmissionWithMedicationDiagnosis('Split_Full_ICD10_ATC.csv')
merged_df, dependent_variable_TNE_BO_180, independent_variable  = ClassifyReadmissionWithMedicationDiagnosis_Obj.load_data()
ClassifyReadmissionWithMedicationDiagnosis_Obj.train_classifier_with_medication_diagnosis()
ClassifyReadmissionWithMedicationDiagnosis_Obj.prediction_with_medication_diagnosis()