# FastAPI и ML-модель. Недвижимость в Калифорнии

## Установка и загрузка библиотек

In [1]:
%%capture
!pip install lightgbm

In [2]:
import pandas as pd
import numpy as np
import lightgbm
import pickle

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn import metrics

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



## Работа с данными

In [3]:
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [4]:
X = df.drop("MedInc", axis=1)
y = df['MedInc']

X_train, X_test, y_train, y_test = train_test_split(
  X, y, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape)

(16512, 7) (4128, 7)


## Обучение модели

In [5]:
model = lightgbm.LGBMRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error: {rmse:.2f}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1583
[LightGBM] [Info] Number of data points in the train set: 16512, number of used features: 7
[LightGBM] [Info] Start training from score 3.880754
Root Mean Squared Error: 0.81


## Сохранение модели

In [6]:
model_filename = 'model.pkl'
with open(model_filename, 'wb') as model_file:
    pickle.dump(model, model_file)

## Реализация веб-сервиса

In [None]:
%%writefile main.py

import pickle
import pandas as pd
from fastapi import FastAPI
from contextlib import asynccontextmanager
from pydantic import BaseModel


class FeatureSet(BaseModel):
    HouseAge: float
    AveRooms: float
    AveBedrms: float
    Population: float
    AveOccup: float
    Latitude: float
    Longitude: float


def medinc_regressor(x: dict) -> dict:
    with open("model.pkl", 'rb') as model_file:
        loaded_model = pickle.load(model_file)
    x_df = pd.DataFrame(x, index=[0])
    res = loaded_model.predict(x_df)[0]
    return {"prediction": res}

ml_models = {}

@asynccontextmanager
async def ml_lifespan_manager(app: FastAPI):
    ml_models["medinc_regressor"] = medinc_regressor
    yield
    ml_models.clear()

app = FastAPI(lifespan=ml_lifespan_manager)

@app.post("/predict")
async def predict(feature_set: FeatureSet):
    return ml_models["medinc_regressor"](feature_set.model_dump())

In [None]:
!uvicorn main:app & npx localtunnel --port 8000 --subdomain fastapi & wget -q -O - https://loca.lt/mytunnelpassword