In [12]:
!pip install fastapi nest-asyncio pyngrok uvicorn h2o



In [16]:
!ngrok authtoken '2ighL0YEwJxisFZFo8JWIFL1wtf_3CdEFhapKNHeoHFAE2m4d'

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [17]:
%%writefile app.py
import asyncio
import sched
import time
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse
import pandas as pd
import h2o
from h2o.automl import H2OAutoML
from io import StringIO
import uuid

app = FastAPI()
h2o.init()

# In-memory storage for tasks
tasks = {}
scheduler = sched.scheduler(time.time, time.sleep)

class TaskStatus:
    WAITING = "waiting"
    IN_PROGRESS = "in_progress"
    DONE = "done"
    ERROR = "error"

def limit_rows(df, max_rows=500):
    num_rows, num_cols = df.shape
    df_non_null = df.dropna()

    if len(df_non_null) > max_rows:
        df_non_null = df_non_null.sample(n=max_rows, random_state=42)
    return df_non_null

def train_model_task(task_id, file_content):
    try:
        tasks[task_id]['status'] = TaskStatus.IN_PROGRESS
        csv_data = StringIO(file_content.decode('utf-8'))
        df = pd.read_csv(csv_data)
        df = limit_rows(df)
        h2o_df = h2o.H2OFrame(df)
        x = h2o_df.columns
        y = x[-1]
        x.remove(y)

        prob_type = "regression"
        target_unique_values = h2o_df[y].unique().nrow
        if (h2o_df[y].isnumeric()[0] and target_unique_values < 10) or not h2o_df[y].isnumeric()[0]:
            prob_type = "classification"
            h2o_df[y] = h2o_df[y].asfactor()

        include_algos = ["GLM", "GBM", "XGBoost"]
        aml = H2OAutoML(max_models=10, seed=1, include_algos=include_algos)
        aml.train(x=x, y=y, training_frame=h2o_df)
        model = aml.leader
        model_metrics = model.model_performance()._metric_json

        include_dl = False
        if prob_type == "classification":
            if float(model_metrics['logloss']) > 0.2:
                include_dl = True
        elif float(model_metrics['r2']) < 0.8:
            include_dl = True

        if include_dl:
            metric = model_metrics['logloss'] if prob_type == "classification" else model_metrics['r2']
            aml2 = H2OAutoML(max_models=2, seed=1, include_algos=["DeepLearning"])
            aml2.train(x=x, y=y, training_frame=h2o_df)
            model2 = aml2.leader
            if prob_type == "classification":
                if float(model2.model_performance()._metric_json['logloss']) < float(model_metrics['logloss']):
                    model = model2
            elif float(model2.model_performance()._metric_json['r2']) > float(model_metrics['r2']):
                model = model2

        model_path = h2o.save_model(model=model, path="./models", force=True)
        model_metrics = model.model_performance()._metric_json

        if prob_type == "classification":
            model_details = {
                'model_id': model.model_id,
                'model_type': model.algo,
                'model_path': model_path,
                'model_category': model_metrics['model_category'],
                'AUC': model_metrics['AUC'],
                'logloss': model_metrics['logloss'],
                'MSE': model_metrics['MSE'],
            }
        else:
            model_details = {
                'model_id': model.model_id,
                'model_type': model.algo,
                'model_path': model_path,
                'model_category': model_metrics['model_category'],
                'MSE': model_metrics['MSE'],
                'RMSE': model_metrics['RMSE'],
                'R2': model_metrics['r2']
            }

        tasks[task_id]['status'] = TaskStatus.DONE
        tasks[task_id]['model_details'] = model_details
    except Exception as e:
        tasks[task_id]['status'] = TaskStatus.ERROR
        tasks[task_id]['error'] = str(e)

@app.post('/train')
async def train_model(file: UploadFile = File(...)):
    task_id = str(uuid.uuid4())
    tasks[task_id] = {'status': TaskStatus.WAITING}
    file_content = await file.read()

    # Schedule the task using the scheduler
    scheduler.enter(0, 1, train_model_task, (task_id, file_content))
    asyncio.create_task(run_scheduler())

    return JSONResponse(content={'task_id': task_id})

@app.get('/monitor/{task_id}')
async def monitor_task(task_id: str):
    task = tasks.get(task_id)
    if not task:
        return JSONResponse(status_code=404, content={'error': 'Task not found'})
    return JSONResponse(content=task)

@app.post('/predict')
async def predict_model(modelpath: str = Form(...), file: UploadFile = File(...)):
    csv_data = StringIO((await file.read()).decode('utf-8'))
    input_df = pd.read_csv(csv_data)
    h2o_input_df = h2o.H2OFrame(input_df)
    model = h2o.load_model(modelpath)
    predictions = model.predict(h2o_input_df)
    predictions_df = predictions.as_data_frame()
    return JSONResponse(content=predictions_df.to_dict(orient="records"))

async def run_scheduler():
    loop = asyncio.get_event_loop()
    await loop.run_in_executor(None, scheduler.run)

@app.on_event("startup")
async def startup_event():
    loop = asyncio.get_running_loop()
    loop.create_task(run_scheduler())


Overwriting app.py


In [None]:
import uvicorn
from pyngrok import ngrok

ngrok.kill()
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

!uvicorn app:app --host 0.0.0.0 --port 8000 --reload

Public URL: NgrokTunnel: "https://ccf3-35-239-116-136.ngrok-free.app" -> "http://localhost:8000"
[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m2714[0m] using [36m[1mStatReload[0m
Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.24" 2024-07-16; OpenJDK Runtime Environment (build 11.0.24+8-post-Ubuntu-1ubuntu322.04); OpenJDK 64-Bit Server VM (build 11.0.24+8-post-Ubuntu-1ubuntu322.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.10/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpx98ptmjl
  JVM stdout: /tmp/tmpx98ptmjl/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpx98ptmjl/h2o_unknownUser_started_from_python.err
  Server is running at http://127.