In [44]:
import os
#import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib
import logging
from sklearn.preprocessing import StandardScaler
# Setup logging
logging.basicConfig(filename='app.log', level=logging.INFO)

# Load and split your data
training_data_path = 'C:\\Users\\Kevin\\Documents\\DAT158\\training_data.csv'
training_data = pd.read_csv(training_data_path)

# Basic validation & preprocessing steps 
#if 'length_of_stay' in training_data.columns and 'gender' in training_data.columns:
#    training_data = pd.get_dummies(training_data, columns=['gender'], drop_first=True)
#    X = training_data.drop(columns=['length_of_stay'])
#    y = training_data['length_of_stay']

# Check if 'length_of_stay' and 'gender' columns exist
if 'length_of_stay' in training_data.columns and 'gender' in training_data.columns:
    print("Found both 'length_of_stay' and 'gender'. Proceeding...")
    training_data = pd.get_dummies(training_data, columns=['gender'], drop_first=True)
    X = training_data.drop(columns=['length_of_stay'])
    y = training_data['length_of_stay']
    
    # Drop the 'date' column from the training data
    #if 'date' in X.columns:
        #X = X.drop(columns=['date'])
    
    # Split data
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize age
    scaler = StandardScaler()
    X_train.loc[:, 'age'] = scaler.fit_transform(X_train[['age']])
    X_val.loc[:, 'age'] = scaler.transform(X_val[['age']])

    # Train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Validate your model
    val_score = model.score(X_val, y_val)
    logging.info(f'Validation score: {val_score}')
    print(val_score)
    
    #filename = "test_data.csv"
    #os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    print("Current Working Directory:", os.getcwd())
    print("Saving to:", os.path.abspath(training_data_path + 'trained_model.joblib'))

    
    # Save the model & scaler
    joblib.dump(model, training_data_path + 'C:\\Users\\Kevin\\Documents\\DAT158\\trained_model.joblib') 
    joblib.dump(scaler, training_data_path + 'scaler.pkl')
else:
    model = None
    scaler = None
    #print("Did not find both 'length_of_stay' and 'gender'. Cannot proceed.")

    print("Current Working Directory:", os.getcwd())
    print("Saving to:", os.path.abspath(training_data_path +" "+ 'trained_model.joblib'))

def predict_length_of_stay(age, gender, diagnoses):
    try:
        # Load model & scaler
        model = joblib.load('C:\\Users\\Kevin\\Documents\\DAT158\\trained_model.joblib')
        scaler = joblib.load('scaler.pkl')
        
        # Basic input preprocessing
        gender_numeric = 1 if gender == "Male" else 0
        num_diagnoses = len(diagnoses.split(","))
        age = scaler.transform([[age]])[0, 0]
        input_feature = np.array([age, gender_numeric, num_diagnoses]).reshape(1, -1)
        
        # Predict & log
        predicted_length_of_stay = model.predict(input_feature)[0]
        logging.info(f'Input: {[age, gender, diagnoses]}, Prediction: {predicted_length_of_stay}')
        
        return f"Predicted length of stay: {round(predicted_length_of_stay, 2)} days"
    except Exception as e:
        logging.error(f'Error: {str(e)}, Input: {[age, gender, diagnoses]}')
        return f"Error: {str(e)}"
   
# Gradio interface 
#iface = gr.Interface(
#    fn=predict_length_of_stay,
#    inputs=[
#        gr.Number(label="Age"),
#        gr.Radio(["Male", "Female"], label="Gender"),
#        gr.Textbox(label="Diagnoses (comma-separated)"),
#    ],
#    outputs=gr.Textbox(label="Predicted length of stay (days)"),
#    title="Hospital Length of Stay Predictor",
#    description="Enter patient data to predict their length of stay in the hospital."
#)

#iface.launch()

print(predict_length_of_stay(18,1,"asthma"))



KeyboardInterrupt: 