## For : Full_ICD10_ATC.csv

### Classification and Predicition result

**Without medication and diagnosis columns**

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import random
from IPython.display import display
from IPython.display import Image
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score,confusion_matrix,precision_score,recall_score,f1_score
from sklearn.metrics import classification_report

from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt

import os
from dotenv import load_dotenv
load_dotenv()


################## Without Medication and Diagnosis ##################
#                                                                    #
#                                                                    #
#                                                                    #
######################################################################

class ClassifyReadmissionWithMedicationDiagnosis:
    def __init__(self, file1):
        self.file1 = file1
        self.merged_df = None
        self.selected_column_merged_df = None
        self.without_diag_medic_selected_column_merged_df = None
    
    def load_data(self):
        original_df_1 = pd.read_csv(self.file1)
        self.merged_df = original_df_1[['MiddleChildhood', 'Preschooler', 'Teenager', 'F', 'M',
        'num_diagnoses','num_medications', 'Therapy_ratio','Examination_ratio', 
        'Advisory_ratio', 'TreatmentPlanning_ratio','Outpatient_ratio', 'Inpatient_ratio', 
        'Inpatient_day_ratio','Inpatient_daynight_ratio', 'closingcode_0', 'closingcode_1',
        'closingcode_2', 'closingcode_3', 'closingcode_4', 'closingcode_5','closingcode_6', 
        'closingcode_9', 'aftercode_1', 'aftercode_2','aftercode_3', 'aftercode_4', 'aftercode_5','Length_of_Episode', 'Count_visit','tillnextepisode','Cat_TNE','No_more_episode','TNE_more180','TNE_less180','Care_intensity','TNE_desc']]
       
        self.merged_df.loc[self.merged_df['tillnextepisode']==0,'tillnextepisode'] = 50000
        
        self.dependent_variable_tillnextepisode = self.merged_df[['tillnextepisode']]
        
        self.dependent_variable_Cat_TNE = self.merged_df[['Cat_TNE']]
        self.dependent_variable_No_more_episode = self.merged_df[['No_more_episode']]
        self.dependent_variable_TNE_desc = self.merged_df[['TNE_desc']]
        
        self.independent_variable = self.merged_df[['MiddleChildhood', 'Preschooler', 'Teenager', 'F', 'M', 'num_diagnoses',
       'num_medications', 'Therapy_ratio',
       'Examination_ratio', 'Advisory_ratio', 'TreatmentPlanning_ratio',
       'Outpatient_ratio', 'Inpatient_ratio', 'Inpatient_day_ratio',
       'Inpatient_daynight_ratio', 'closingcode_0', 'closingcode_1',
       'closingcode_2', 'closingcode_3', 'closingcode_4', 'closingcode_5',
       'closingcode_6', 'closingcode_9', 'aftercode_1', 'aftercode_2',
       'aftercode_3', 'aftercode_4', 'aftercode_5', 'Length_of_Episode', 'Count_visit']]
        
        return self.merged_df, self.dependent_variable_tillnextepisode, self.dependent_variable_Cat_TNE,self.dependent_variable_No_more_episode,self.dependent_variable_TNE_desc,self.independent_variable
        
    def train_classifier_without_medication_diagnosis(self):
        dependent_variables = [self.dependent_variable_Cat_TNE,self.dependent_variable_No_more_episode,self.dependent_variable_TNE_desc]
        dependent_variable_names = {'self.dependent_variable_Cat_TNE': 'Cat_TNE', 'self.dependent_variable_No_more_episode': 'No_more_episode', 'self.TNE_desc': 'TNE_desc'}
        class_weights = {'Cat_TNE': {0:1, 1:18, 2:19, 3:20, 4:21, 5:22, 6:23, 7:24, 8:25, 9:26, 10:27, 11:28, 12:29, 13:30}, 'No_more_episode': {0:10, 1:10}, 'TNE_desc': {0:1, 1:9, 2:10}}
              
        #for dependent_variable in dependent_variables:
        for variable_name, dependent_variable in zip(dependent_variable_names.values(), dependent_variables):
            logistic_prediction_model = LogisticRegression(class_weight=class_weights[variable_name])
            X_train, X_test, y_train, y_test = train_test_split(self.independent_variable, dependent_variable, train_size=0.7)
            logistic_prediction_model.fit(X_train,y_train)
            y_pred = logistic_prediction_model.predict(X_test)
            
            # Checks if the output category for classification is binary or multiclass
            category_count = int(y_train.nunique())
            print(f'\n****** Evaluation result {variable_name} as dependent variable ******')
            # For binary class classification 
            if category_count == 2:
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred, average='binary')
                recall = recall_score(y_test, y_pred, average='binary')
                f1 = f1_score(y_test, y_pred, average='binary')
                
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1score: {f1}")
                print("Confused matrix:")
                conf_matrix = confusion_matrix(y_test, y_pred)
                tn, fp, fn, tp = conf_matrix.ravel()
                print(conf_matrix)
                print(f"True Negative (TN): {tn}")
                print(f"False Positive (FP): {fp}")
                print(f"False Negative (FN): {fn}")
                print(f"True Positive (TP): {tp}")
                print(classification_report(y_test, y_pred))
            
            # For binary class classification 
            else:
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred, average='weighted')
                recall = recall_score(y_test, y_pred, average='weighted')
                f1 = f1_score(y_test, y_pred, average='weighted')
                
                print(f"Accuracy: {accuracy}")
                print(f"Precision: {precision}")
                print(f"Recall: {recall}")
                print(f"F1score: {f1}")
                conf_matrix = confusion_matrix(y_test, y_pred)
                print("Confused matrix:")
                print(conf_matrix)
                print(classification_report(y_test, y_pred))
                
    def prediction_without_medication_diagnosis(self):
        dependent_variables = [self.dependent_variable_tillnextepisode]
        dependent_variable_names = {'self.dependent_variable_tillnextepisode':'tillnextepisode'}
              
            
        #for dependent_variable in dependent_variables:
        for variable_name, dependent_variable in zip(dependent_variable_names.values(), dependent_variables):
            linear_prediction_model = LinearRegression()
            X_train, X_temp, y_train, y_temp = train_test_split(self.independent_variable, dependent_variable, train_size=0.7)
            X_test,X_eval,y_test,y_eval = train_test_split(X_temp,y_temp,test_size=0.33)
            linear_prediction_model.fit(X_train,y_train)
            y_pred = linear_prediction_model.predict(X_test)
            print(f'\n****** Prediction Model Evaluation result  {variable_name} as dependent variable ******')
            y_pred = linear_prediction_model.predict(X_test)
            r2 = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            print('R-squared:', r2)
            print('Mean squared error:', mse)
                

ClassifyReadmissionWithMedicationDiagnosis_Obj = ClassifyReadmissionWithMedicationDiagnosis('/mnt/work/workbench/dipendrp/new-data/fullHot_episodes.csv')
merged_df, dependent_variable_tillnextepisode, dependent_variable_Cat_TNE,dependent_variable_No_more_episode,dependent_variable_TNE_desc,independent_variable  = ClassifyReadmissionWithMedicationDiagnosis_Obj.load_data()

ClassifyReadmissionWithMedicationDiagnosis_Obj.train_classifier_without_medication_diagnosis()
ClassifyReadmissionWithMedicationDiagnosis_Obj.prediction_without_medication_diagnosis()
