In [None]:
#!pip install python-dotenv

In [1]:
import os
from pathlib import Path
from typing import Union
import datetime
from dotenv import load_dotenv
import hopsworks
import json
import logging
import pandas as pd
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

In [2]:
def load_env_vars(root_dir: Union[str, Path]) -> dict:
    """
    Load environment variables from .env.default and .env files.\\

    Args:
        root_dir: Root directory of the .env files.

    Returns:
        Dictionary with the environment variables.
    """

    if isinstance(root_dir, str):
        root_dir = Path(root_dir)

    load_dotenv(dotenv_path=root_dir / ".env.default")
    load_dotenv(dotenv_path=root_dir / ".env", override=True)

    return dict(os.environ)

In [3]:
def get_root_dir(default_value: str = ".") -> Path:
    """
    Get the root directory of the project.

    Args:
        default_value: Default value to use if the environment variable is not set.

    Returns:
        Path to the root directory of the project.
    """

    return Path(os.getenv("ML_PIPELINE_ROOT_DIR", default_value))


Just if you have any env file to load the settings, run the commands:

**ML_PIPELINE_ROOT_DIR = get_root_dir()**


**SETTINGS_GENERAL = load_env_vars(root_dir=ML_PIPELINE_ROOT_DIR)**

Here I will put them directly in the notebook.

In [6]:
# project's NOT WORKING
SETTINGS_STORE = {
    "FS_API_KEY" : "69gkMScoBHwdxjnl.drmoEth2xNKAT2h4Y1WNWM0XOk1v7iUykYZSTbvUlIEyRLIuucDyOz7plnF5qsEK",
    "FS_PROJECT_NAME" : "mlops_api"
}

In [7]:
# checking connection

project = hopsworks.login(
        api_key_value=SETTINGS_STORE["FS_API_KEY"], project=SETTINGS_STORE["FS_PROJECT_NAME"]
    )

feature_store = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.


ProjectException: Could not find project mlops_api

-------------- PAREI AQUI //Andre 14/06 00:59 ------------------

In [11]:
def to_feature_store(
    data: pd.DataFrame,
    group_name: str,
    feature_group_version: int,
    description: str,
    group_description: dict,
    validation_expectation_suite: ExpectationSuite,
    SETTINGS: dict
):
    """
    This function takes in a pandas DataFrame and a validation expectation suite,
    performs validation on the data using the suite, and then saves the data to a
    feature store in the feature store.

    Args:
        data (pd.DataFrame): Dataframe with the data to be stored
        group_name (str): Name of the feature group.
        feature_group_version (int): Version of the feature group.
        description (str): Description for the feature group.
        group_description (dict): Description of each feature of the feature group. 
        validation_expectation_suite (ExpectationSuite): group of expectations to check data.
        SETTINGS (dict): Dictionary with the settings definitions to connect to the project.
        
    Returns:
        A dictionary with the feature view version, feature view name and training dataset feature version.
    
    
    """
    # Connect to feature store.
    project = hopsworks.login(
        api_key_value=SETTINGS["FS_API_KEY"], project=SETTINGS["FS_PROJECT_NAME"]
    )
    feature_store = project.get_feature_store()

    # Create feature group.
    object_feature_group = feature_store.get_or_create_feature_group(
        name=group_name,
        version=feature_group_version,
        description= description,
        primary_key=["index"],
        event_time="datetime",
        online_enabled=False,
        expectation_suite=validation_expectation_suite,
    )
    # Upload data.
    object_feature_group.insert(
        features=data,
        overwrite=False,
        write_options={
            "wait_for_job": True,
        },
    )

    # Add feature descriptions.

    for description in group_description:
        object_feature_group.update_feature_description(
            description["name"], description["description"]
        )

    # Update statistics.
    object_feature_group.statistics_config = {
        "enabled": True,
        "histograms": True,
        "correlations": True,
    }
    object_feature_group.update_statistics_config()
    object_feature_group.compute_statistics()

    return object_feature_group