# Training Pipeline

## Installing Hopsworks (feature store)

In [7]:
!pip install -U hopsworks --quiet

In [13]:
# Importing ML libraries 
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
# Importing pandas, numpy and hopsworks
import pandas as pd
import seaborn as sns
import hopsworks

### Creating a feaure view in Hopsworks

In [15]:
project = hopsworks.login()
fs = project.get_feature_store()

try: 
    feature_view = fs.get_feature_view(name="car_prices", version=1)
except:
    car_fg  = fs.get_feature_group(name="car_prices", version=1)
    query = car_fg .select_all()
    feature_view = fs.create_feature_view(name="car_prices",
                                      version=1,
                                      description="Read from car price dataset",
                                      labels=["selling_price"],
                                      query=query)

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/3342
Connected. Call `.close()` to terminate connection gracefully.
Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/3342/fs/3289/fv/car_prices/version/1


In [16]:
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = feature_view.train_test_split(0.15)



In [17]:
# Dropping the high cardinality feature name from X_train and X_test
X_train.drop("full_name", axis = 1, inplace = True)
X_test.drop("full_name", axis = 1, inplace = True)

In [None]:
model = RandomForestRegressor()
model.fit(X_train,y_train)
model.score(X_test,y_test)
mae = mean_absolute_error(y_test, model.predict(X_test))

In [40]:
print("The mean absolute error in lakhs is:", mae)

The mean absolute error in lakhs is: 1.478139810529481


### Saving the model in the model registry of Hopsworks

In [36]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema
import os
import joblib
import hopsworks
import shutil

project =  hopsworks.login()
mr = project.get_model_registry()

# The 'iris_model' directory will be saved to the model registry
model_dir="car_prices_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)
joblib.dump(model, model_dir + "/car_prices_model.pkl")

input_example = X_train.sample()
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema, output_schema)

car_prices_model = mr.python.create_model(
    version=1,
    name="car_prices", 
    metrics={"Mean Absolute Error" : mae},
    model_schema=model_schema,
    input_example=input_example, 
    description="Car Price Predictor")

car_prices_model.save(model_dir)

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/3342
Connected. Call `.close()` to terminate connection gracefully.


  0%|          | 0/6 [00:00<?, ?it/s]

Model created, explore it at https://c.app.hopsworks.ai:443/p/3342/models/car_prices/1


Model(name: 'car_prices', version: 1)