# Imports

Use XGBoost regressor for training

In [1]:
import pandas as pd

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, f1_score

import warnings
warnings.filterwarnings("ignore")

# Feature View and Training Dataset Retrieval

## Connecting to Hopsworks Feature Store

In [None]:
!pip install hopsworks

In [None]:
import hopsworks

project = hopsworks.login() 

fs = project.get_feature_store() 

## Get Data

In [4]:
feature_view = fs.get_feature_view(
    name = 'miami_air_quality_fv',
    version = 1
)

In [None]:
train_data = feature_view.get_training_data(1)[0]

train_data.head()

# Training

## Modeling

In [None]:
train_data = train_data.sort_values(by="date", ascending=True).reset_index(drop=True)
train_data["aqi_next_day"] = train_data.groupby('city')['aqi'].shift(1)

train_data.head(5)

In [None]:
X = train_data.drop(columns=["date"]).fillna(0)
y = X.pop("aqi_next_day")

In [24]:
x_train, x_test, y_train, y_test = train_test_split(X,y)

In [None]:
regressor = xgb.XGBRegressor(
    n_estimators=100,
    reg_lambda=1,
    gamma=0,
    max_depth=3
)
regressor.fit(X, y)

## Validation

Mean Square Error

In [None]:
y_pred = regressor.predict(x_test)

mse = mean_squared_error(y_test, y_pred)

mse

F1 Score

In [None]:
f1_score(y.astype('int'),[int(pred) for pred in regressor.predict(X)],average='micro')

In [None]:
y.iloc[4:10].values

In [None]:
pred_df = pd.DataFrame({
    'aqi_real': y.iloc[4:10].values,
    'aqi_pred': map(int, regressor.predict(X.iloc[4:10]))
}
)
pred_df

# Model Registry

In [None]:
mr = project.get_model_registry()

## Model Schema

In [None]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(X)
output_schema = Schema(y)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

model_schema.to_dict()

In [None]:
import joblib

joblib.dump(regressor, 'model.pkl')

In [None]:
model = mr.sklearn.create_model(
    name="xgboost_model",
    metrics={"f1": "0.5"},
    description="XGBoost Regressor.",
    input_example=X.sample().to_numpy(),
    model_schema=model_schema
)

model.save('model.pkl')