# **Importing libraries**

In [1]:
# Standard libraries
import os

# Third libraries 
import numpy as np 

import pandas as pd 
import matplotlib.pyplot as plt
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score



# Loading data

In [2]:
data_training = pd.read_csv("./data/training_data.csv")
data_test = pd.read_csv("./data/test_data.csv")
data_dictionary = pd.read_csv("./data/data_dictionary.csv")
data_sample_submission = pd.read_csv("./data/sample_result.csv")
data_meta = pd.read_csv("./data/metadata.csv")


## Cleaning the data

In [3]:
def clean_data(data):
    #print(data)
    
  
    data.dropna(inplace=True)

    if data['gender'].dtype == 'object':
        print("run")
        data['gender'] = data['gender'].map({'M': 0, 'F': 1})
    if data['facid'].dtype == 'object':
        data['facid'] = data['facid'].map({'A': 0, 'B': 1, "C":2, "D":3,"E":4})
    if data['hemo'].dtype == 'object':
        data['hemo'] = data['hemo'].map({False: 0, True: 1})
    
    data['psychologicaldisordermajor'] = data['psychologicaldisordermajor'].map({False: 0, True: 1})
    data['pneum'] = data['pneum'].map({False: 0, True: 1})

    data['dialysisrenalendstage'] = data['dialysisrenalendstage'].map({False: 0, True: 1})
    
    data['asthma'] = data['asthma'].map({False: 0, True: 1})
    data['irondef'] = data['irondef'].map({False: 0, True: 1})
    data['substancedependence'] = data['substancedependence'].map({False: 0, True: 1})
    data['depress'] = data['depress'].map({False: 0, True: 1})
    data['psychother'] = data['psychother'].map({False: 0, True: 1})
    data['fibrosisandother'] = data['fibrosisandother'].map({False: 0, True: 1})
    data['malnutrition'] = data['malnutrition'].map({False: 0, True: 1})
    
    
    return data



In [4]:
pd.set_option("display.max_columns",None)
cleaned_data = clean_data(data_training);
#cleaned_data.head(2)

run


## Splitting the data training data set into two sets

In [5]:

x = cleaned_data.drop(columns=['lengthofstay',"vdate"],axis=1)
y = cleaned_data["lengthofstay"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

In [60]:
#cleaned_data.isna().sum()

# Create the model

In [6]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Training the model

In [7]:
model.fit(x_train,y_train)

## Apply the model to make predictions

In [8]:
y_pred = model.predict(x_test)

In [9]:
np.mean(y_pred)

3.953900167736711

## Evaluate the model performance

In [10]:
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test, y_pred)



In [11]:
performance_data = {
    'Metric': ['MER', 'R2',],
    'Score': [mse, r2]
}

performance_df = pd.DataFrame(performance_data)

print(performance_df.to_string(index=False))

Metric    Score
   MER 0.454808
    R2 0.917566


In [28]:
data_training.head()

Unnamed: 0,id,vdate,rcount,gender,dialysisrenalendstage,asthma,irondef,pneum,substancedependence,psychologicaldisordermajor,depress,psychother,fibrosisandother,malnutrition,hemo,hematocrit,neutrophils,sodium,glucose,bloodureanitro,creatinine,bmi,pulse,respiration,secondarydiagnosisnonicd9,facid,lengthofstay
0,76513,2012-07-05,4,M,False,False,False,False,False,True,False,False,False,False,False,16.6,7.0,137.150067,148.420532,23.0,0.955051,28.450268,63,6.7,1,E,7
1,60406,2012-02-26,1,M,False,False,False,False,False,False,False,False,False,False,False,9.0,7.1,140.643655,166.718526,9.0,1.385747,29.233048,81,6.5,0,B,3
2,27322,2012-11-19,1,F,False,False,False,False,False,False,False,False,False,False,False,17.0,12.6,138.459305,106.468142,12.0,0.816984,34.295535,75,6.5,1,B,5
3,53699,2012-01-30,0,F,False,False,False,False,False,False,False,False,False,False,False,17.0,19.2,138.667613,144.311236,12.0,1.082725,31.207914,75,6.5,1,B,2
4,65412,2012-09-04,3,F,False,True,False,False,False,False,False,False,False,False,False,12.3,9.2,133.463292,199.728635,12.0,1.081557,30.475797,89,6.5,7,D,8


# Create a Gradio web app

In [12]:

import warnings

# Filter out the UserWarnings from Gradio
warnings.filterwarnings("ignore", category=UserWarning, module="gradio")

import random

def generate_random_id():
   
    return random.randint(100, 3000)


#Defining default values for 20 out of 25 attributes 
default_values = {
    "id":generate_random_id(),
    "rcount":2,
    "dialysisrenalendstage":False,
    "pneum":False,
    "substancedependence": False,
    "psychologicaldisordermajor": True,
    "psychother": True,
    "fibrosisandother": False,
    "malnutrition": False,
    "hemo": False,
    "hematocrit": 16,
    "neutrophils": 7,
    "sodium": 136,
    "glucose": 147,
    "bloodureanitro": 20,
    "creatinine": 0.9,
    "pulse": 6.5,
    "respiration": 1,
    "secondarydiagnosisnonicd9": 1,
    "facid": "B",
}

# Define input components for user-specified features
user_input_features = [
    gr.inputs.Textbox(label="what is your pulse rate (bpm)?"),
    gr.inputs.Textbox(label="What is your BMI?"),
    gr.inputs.Radio(label="What is your gender?", choices=["M", "F"]),
    gr.inputs.Radio(label="Do you have depression?", choices=["True", "False"]),
    gr.inputs.Radio(label="Do you have asthma?",choices=["True", "False"]),
    gr.inputs.Radio(label="Do you have iron deficiency?",choices=["True", "False"]),
]


def predict_some(pulse,bmi, gender,depress, asthma, irondef):
    
    input_data = {
        "id": generate_random_id(),
        "rcount": None,
        "gender": gender,
        "dialysisrenalendstage": None,
        "asthma": asthma,
        "irondef": irondef,
        "pneum": None,
        "substancedependence": None,
        "psychologicaldisordermajor": None,
        "depress": depress,
        "psychother": None,
        "fibrosisandother": None,
        "malnutrition": None,
        "hemo": None,
        "hematocrit": None,
        "neutrophils": None,
        "sodium": None,
        "glucose": None,
        "bloodureanitro": None,
        "creatinine": None,
        "bmi": bmi,
        "pulse": pulse,
        "respiration": None,
        "secondarydiagnosisnonicd9": None,
        "facid": None,
    }

    input_data["asthma"] = input_data["asthma"] == "True"
    input_data["irondef"] = input_data["irondef"] == "True"
    input_data["depress"] = input_data["depress"] == "True"
    print(input_data['bmi'])
    input_data['pulse'] = float(input_data['pulse'])
    input_data['bmi'] = float(input_data['bmi'])

    for feature, default_value in default_values.items():
        if not input_data[feature]:
            input_data[feature] = default_value

            
       # Convert to pandas dataframe
    #print(input_data)
    df = pd.DataFrame(input_data,index=[0])
    
    
    df["bmi"]
    
    # Clean the input data 
    cleaned_input_data = clean_data(df)

    
    # Make predictions 
    prediction = model.predict(cleaned_input_data)
    print(prediction)
    result = str(np.round(prediction)[0])
    return result + " days"


# Create the Gradio interface with the input components
output = [
    gr.Textbox(label="Predicted number of days in hospital:"),
]

iface = gr.Interface(
   fn=predict_some,
   inputs=user_input_features,
   outputs=output,
   title="Your Assistant",
   description="Anwser some questions and get prediction on the length of your stay in the hospital.",
)

# Launch the interface
iface.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




# Applying the trained model on a new and unseen data set

In [15]:
cleaned_new_data = clean_data(data_test)
cleaned_new_data.head()

Unnamed: 0,id,rcount,gender,dialysisrenalendstage,asthma,irondef,pneum,substancedependence,psychologicaldisordermajor,depress,psychother,fibrosisandother,malnutrition,hemo,hematocrit,neutrophils,sodium,glucose,bloodureanitro,creatinine,bmi,pulse,respiration,secondarydiagnosisnonicd9,facid
0,75721,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,9.1,9.55,137.722354,141.479044,12.0,0.676542,28.702016,70,5.9,2,0.0
1,80184,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,9.5,11.6,137.861621,184.433689,12.0,1.520272,29.702723,62,6.5,1,0.0
2,19864,4,0.0,0,1,1,0,0,0,0,0,0,0,0.0,11.8,16.233333,138.586277,185.799872,8.5,1.137023,29.98478,68,5.3,1,3.0
3,76699,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,12.5,9.1,142.714836,113.951944,12.0,0.845051,32.509896,84,6.5,2,1.0
4,92991,0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,8.6,12.0,142.683468,111.78784,12.0,0.814372,29.870887,88,6.5,1,1.0


In [130]:
# cleaned_new_data.isna().sum()

In [17]:
predictions = model.predict(cleaned_new_data)
df = pd.DataFrame()
df["id"] = cleaned_new_data["id"]
df["lengthofstay"] = predictions

df.to_csv('predictions.csv', index=False) 

In [18]:
restult = pd.read_csv("./prediction.csv")
restult.head()


Unnamed: 0,id,lengthofstay
0,75721,2.07
1,80184,2.99
2,19864,7.5
3,76699,2.68
4,92991,3.1


50
run
[5.44]


In [19]:
restult.shape

(30000, 2)