In [1]:
import random
import os
import pandas as pd
import hopsworks
from dotenv import load_dotenv
from src.paths import PARENT_DIR

## Set to True when initially building out the feature group with original data
# BACKFILL=True

## To do the batch prediction (we may want to see what the last prediction was, set to False as it will generate random new values)
BACKFILL=False

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_flower(name, sepal_len_max, sepal_len_min, sepal_width_max, sepal_width_min, 
                    petal_len_max, petal_len_min, petal_width_max, petal_width_min):
    """
    Returns a single iris flower as a single row in a DataFrame
    """
    df = pd.DataFrame({ "sepal_length": [random.uniform(sepal_len_max, sepal_len_min)],
                       "sepal_width": [random.uniform(sepal_width_max, sepal_width_min)],
                       "petal_length": [random.uniform(petal_len_max, petal_len_min)],
                       "petal_width": [random.uniform(petal_width_max, petal_width_min)]
                      })
    df['variety'] = name
    return df


def get_random_iris_flower():
    """
    Returns a DataFrame containing one random iris flower
    """
    virginica_df = generate_flower("Virginica", 8, 5.5, 3.8, 2.2, 7, 4.5, 2.5, 1.4)
    versicolor_df = generate_flower("Versicolor", 7.5, 4.5, 3.5, 2.1, 3.1, 5.5, 1.8, 1.0)
    setosa_df =  generate_flower("Setosa", 6, 4.5, 4.5, 2.3, 1.2, 2, 0.7, 0.3)

    # randomly pick one of these 3 and write it to the featurestore
    pick_random = random.uniform(0,3)
    if pick_random >= 2:
        iris_df = virginica_df
    elif pick_random >= 1:
        iris_df = versicolor_df
    else:
        iris_df = setosa_df

    return iris_df

In [3]:
if BACKFILL == True:
    iris_df = pd.read_csv("https://repo.hops.works/master/hopsworks-tutorials/data/iris.csv")
else:
    iris_df = get_random_iris_flower()
    
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,variety
0,7.353157,3.387207,6.367472,1.673796,Virginica


In [4]:
HOPSWORKS_PROJECT_NAME = 'Vivekmaj1'

# load key-value pairs from .env file located in the parent directory
load_dotenv(PARENT_DIR / '.env')

HOPSWORKS_API_KEY = os.environ['HOPSWORKS_API_KEY']

In [5]:
project = hopsworks.login(
    project=HOPSWORKS_PROJECT_NAME,
    api_key_value=HOPSWORKS_API_KEY
)

fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/552084
Connected. Call `.close()` to terminate connection gracefully.


In [6]:
iris_fg = fs.get_or_create_feature_group(name="iris",
                                  version=1,
                                  primary_key=["sepal_length","sepal_width","petal_length","petal_width"],
                                  description="Iris flower dataset"
                                 )
iris_fg.insert(iris_df)

Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:05 | Remaining Time: 00:00


Launching job: iris_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/552084/jobs/named/iris_1_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x234a01945b0>, None)