# Feature store example
---

The idea of this notebook is to give a simple example on how we manage to ingest and retrieve data from the feature store in a batch way.

### Importing packages

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from elemeno_ai_sdk.ml.features.feature_store import FeatureStore
from elemeno_ai_sdk.ml.features.feature_table import FeatureTable
from elemeno_ai_sdk.ml.features.ingest.sink.ingestion_sink_builder import IngestionSinkType, FileIngestionSinkType
from elemeno_ai_sdk.ml.features.ingest.source.ingestion_source_builder import IngestionSourceType
from elemeno_ai_sdk.ml.features.types import FeatureType
from elemeno_ai_sdk.ml.features.ingest.source.base_source import ReadResponse
import pandas as pd
import feast
from typing import List
import os



Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  pkg_resources.declare_namespace(__name__)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(parent)


In [3]:
os.environ["AWS_ACCESS_KEY_ID"] = "AKIAVIHITLZQSTVFTDFE"
os.environ["AWS_SECRET_ACCESS_KEY"] = "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"

### Auxiliary functions

In [4]:
def prepare_response(file_path: str) -> ReadResponse: 
    data = pd.read_csv(file_path)
    return ReadResponse(data)

def get_entities(id_columns: List[str]) -> List[feast.Entity]:
    return [feast.Entity(name=id_col) for id_col in id_columns] 

def get_features(feature_list: List[str]) -> List[feast.Feature]:
    features = []
    for feature in feature_list:
        if feature == "created_timestamp" or feature == "event_timestamp":
            dtype = feast.ValueType.BYTES
        elif feature == "target":
            dtype = feast.ValueType.INT32
        else:
            dtype = feast.ValueType.FLOAT
        features.append(feast.Feature(name=feature, dtype=dtype))
    return features

### Dataframe to ingest

In [5]:
response = prepare_response(file_path="./example_data/datasource.csv")

In [6]:
response.dataframe.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,id,target,created_timestamp,event_timestamp
0,5.1,3.5,1.4,0.2,0,0,2022-07-14 18:08:05.487499,2022-07-14 18:08:05.488248
1,4.9,3.0,1.4,0.2,1,0,2022-07-14 18:08:05.487499,2022-07-14 18:08:05.488248
2,4.7,3.2,1.3,0.2,2,0,2022-07-14 18:08:05.487499,2022-07-14 18:08:05.488248
3,4.6,3.1,1.5,0.2,3,0,2022-07-14 18:08:05.487499,2022-07-14 18:08:05.488248
4,5.0,3.6,1.4,0.2,4,0,2022-07-14 18:08:05.487499,2022-07-14 18:08:05.488248


### Creating feature store

In [13]:
feature_store = FeatureStore(
    sink_type=IngestionSinkType.REDSHIFT, 
    source_type=IngestionSourceType.REDSHIFT
)

In [14]:
FEATURES = [col for col in response.dataframe.columns if col != "id"]
IDS = ["id"]

entities = get_entities(id_columns=IDS)
features = get_features(feature_list=FEATURES)

In [15]:
feature_table = FeatureTable(
    name="one_blinc_fs",
    feature_store=feature_store,
    entities=entities,
    features=features
)

### Ingest features

In [10]:
feature_store.ingest_response(feature_table=feature_table, to_ingest=response)

No expected columns provided. Will ingest all columns.
--- Logging error ---
Traceback (most recent call last):
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site-packages/sqlalchemy/engine/base.py", line 3366, in _wrap_pool_connect
    return fn()
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 327, in connect
    return _ConnectionFairy._checkout(self)
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 894, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site-packages/sqlalchemy/pool/base.py", line 493, in checkout
    rec = pool._do_get()
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site-packages/sqlalchemy/pool/impl.py", line 145, in _do_get
    with util.safe_reraise():
  File "/Users/bruno/.pyenv/versions/elemeno-ai-sdk/lib/python3.10/site

OperationalError: (psycopg2.OperationalError) connection to server on socket "@redshift-cluster-2.cwuptesab6o8.us-east-1.redshift.amazonaws.com/.s.PGSQL.5439" failed: No such file or directory
	Is the server running locally and accepting connections on that socket?

(Background on this error at: https://sqlalche.me/e/14/e3q8)

### Retrieve features

In [None]:
retrieved_data = feature_store.get_training_features(feature_table=feature_table)

In [None]:
retrieve_data