# Feast: Feature Store

![](https://docs.feast.dev/~gitbook/image?url=https%3A%2F%2F2421270310-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Fspaces%252FJzgtwAlzWz17rLGdYz1v%252Fuploads%252Fgit-blob-9f7df7c01969608f5a8b1d48b21f20ddeaed5590%252Ffeast_marchitecture.png%3Falt%3Dmedia&width=768&dpr=2&quality=100&sign=fdb1f669&sv=2)


In [0]:
import uuid
import pandas as pd


from feast import FeatureStore
from feast import (
    Entity,
    FeatureService,
    FeatureView,
    Field,
    FileSource
)
from feast.types import Float32, Float64, Int64, String


from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline

In [0]:
RAW_DATA_FILEPATH = "../data/raw/hotel_bookings.csv"

raw_data = pd.read_csv(RAW_DATA_FILEPATH)
raw_data["booking_id"] = [str(uuid.uuid4()) for _ in range(raw_data.shape[0]) ]
raw_data.to_parquet("../feast_service/dsrp_fs_mle2/feature_repo/data/hotel_bookings.parquet")

In [0]:
class FeatureEngineeringProcessor:

    def __init__(self):
        pass

In [0]:
fs = FeatureStore(repo_path="../feast_service/dsrp_fs_mle2/feature_repo/")

## Registro Feature Store

![](https://docs.feast.dev/~gitbook/image?url=https%3A%2F%2F2421270310-files.gitbook.io%2F%7E%2Ffiles%2Fv0%2Fb%2Fgitbook-x-prod.appspot.com%2Fo%2Fspaces%252FJzgtwAlzWz17rLGdYz1v%252Fuploads%252Fgit-blob-af58d3cf3809fcc5e69119de273668f715f27538%252Fimage%2520%287%29.png%3Falt%3Dmedia&width=768&dpr=4&quality=100&sign=a07b27a7&sv=2)

In [0]:
# Entidad
booking = Entity(name="booking", join_keys=["booking_id"])

In [0]:
# File Source
booking_raw_source = FileSource(
    name="booking_raw_source",
    path=RAW_DATA_FILEPATH,
    timestamp_field="event_timestamp",
    created_timestamp_column="created",
    file_format="csv"
)

In [0]:
booking_stats_fv = FeatureView(
    # The unique name of this feature view. Two feature views in a single
    # project cannot have the same name
    name="booking_monthly_stats",
    entities=[booking],
    # The list of features defined below act as a schema to both define features
    # for both materialization of features into a store, and are used as references
    # during retrieval for building a training dataset or serving features
    schema=[
        Field(name="avg_time", dtype=Float32),
    ],
    source=booking_raw_source,
    # Tags are user defined key/value pairs that are attached to each
    # feature view
    tags={"team": "booking_monthly"},
)