In [1]:
import sys
from pathlib import Path

def is_google_colab() -> bool:
    if "google.colab" in str(get_ipython()):
        return True
    return False

def clone_repository() -> None:
    !git clone https://github.com/featurestorebook/mlfs-book.git
    %cd mlfs-book

def install_dependencies() -> None:
    !pip install --upgrade uv
    !uv pip install --all-extras --system --requirement pyproject.toml

if is_google_colab():
    clone_repository()
    install_dependencies()
    root_dir = str(Path().absolute())
    print("Google Colab environment")
else:
    root_dir = Path().absolute()
    # Strip ~/notebooks/ccfraud from PYTHON_PATH if notebook started in one of these subdirectories
    if root_dir.parts[-1:] == ('titanic',):
        root_dir = Path(*root_dir.parts[:-1])
    if root_dir.parts[-1:] == ('notebooks',):
        root_dir = Path(*root_dir.parts[:-1])
    root_dir = str(root_dir) 
    print("Local environment")

# Add the root directory to the `PYTHONPATH` to use the `recsys` Python module from the notebook.
if root_dir not in sys.path:
    sys.path.append(root_dir)
print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Local environment
Added the following directory to the PYTHONPATH: /home/jdowling/Projects/mlfs-book


In [2]:
import random
import time
import os
import hopsworks
import pandas as pd
from mlfs import config

random.seed(time.time())

#### Function to create a random passenger on the Titanic


In [3]:
settings = config.HopsworksSettings(_env_file=f"{root_dir}/.env")

project = hopsworks.login()
fs = project.get_feature_store()

2025-01-08 13:57:15,340 INFO: Initializing external client
2025-01-08 13:57:15,341 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-08 13:57:16,473 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/17565


In [4]:
def generate_random_passenger(id):
    """
    Returns a single Titanic passenger as a single row in a DataFrame
    """

    survived = False
    pick_random = random.uniform(0,2)
    if pick_random >= 1:
        print("Survivor added")
    else:
        print("Non-Survivor added")

    if survived:
        unif = random.uniform(0, 1)
        if unif < 109/342:
            sex = 'male'
        else:
            sex = 'female'
        if unif < 136/342:
            pclass = 1
        elif unif < 223/342:
            pclass = 2
        else:
            pclass = 3
        age = random.uniform(0.42, 80.0)
        if unif < 25/100:
            fare = random.uniform(0.0, 12.47)
        elif unif < 50/100:
            fare = random.uniform(12.47, 26.0)
        elif unif < 75/100:
            fare = random.uniform(26.0, 57.0)
        else:
            fare = random.uniform(57.0, 512.0)
        if unif < 233/342:
            parch = 0.0
        elif unif < (65+233)/342:
            parch = 1.0
        elif unif < (40+65+233)/342:
            parch = 2.0
        else:
            parch = round(random.uniform(3.0, 5.0))
        if unif < 210/342:
            sibsp = 0.0
        elif unif < (112+210)/342:
            sibsp = 1.0
        else:
            sibsp = round(random.uniform(2.0, 4.0))
        if unif < 219/342:
            embarked = 'S'
        elif unif < (93+210)/342:
            embarked = 'C'
        else:
            embarked = 'Q'
    else:
        unif = random.uniform(0, 1)
        if unif < 468/549:
            sex = 'male'
        else:
            sex = 'female'
        if unif < 80/549:
            pclass = 1
        elif unif < 177/549:
            pclass = 2
        else:
            pclass = 3
        age = random.uniform(1.0, 74.0)
        if unif < 25/100:
            fare = random.uniform(0.0, 7.85)
        elif unif < 50/100:
            fare = random.uniform(7.85, 10.5)
        elif unif < 75/100:
            fare = random.uniform(10.5, 26.0)
        else:
            fare = random.uniform(26.0, 263.0)
        if unif < 445/549:
            parch = 0.0
        elif unif < (53+445)/549:
            parch = 1.0
        elif unif < (40+53+445)/549:
            parch = 2.0
        else:
            parch = round(random.uniform(3.0, 6.0))
        if unif < 398/549:
            sibsp = 0.0
        elif unif < (97+398)/549:
            sibsp = 1.0
        else:
            sibsp = round(random.uniform(2.0, 6.0))
        if unif < 427/549:
            embarked = 'S'
        elif unif < (75+427)/549:
            embarked = 'C'
        else:
            embarked = 'Q'

    df = pd.DataFrame({ "passengerid": id, "sex": [sex], "age": [age], "pclass": [pclass], "fare": [fare],
                       "parch":[round(parch)], "sibsp": [round(sibsp)], "embarked": [embarked]
                      })
    df['survived'] = round(survived)
    return df

In [5]:
titanic_fg = fs.get_feature_group(name="titanic", version=1)

df = titanic_fg.read()
id = df['passengerid'].max() + 1 
titanic_df = generate_random_passenger(id)
titanic_fg.insert(titanic_df, wait=True)

Reading data from Hopsworks, using Hopsworks Feature Query Service.   

/arrow/cpp/src/arrow/status.cc:137: DoAction result was not fully consumed: Cancelled: Flight cancelled call, with message: CANCELLED. Detail: Cancelled


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.71s) 
Non-Survivor added



Uploading Dataframe: 100.00% |█████████████████████████████████████████████████████████████████████████████████████████████| Rows 1/1 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: titanic_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/17565/jobs/named/titanic_1_offline_fg_materialization/executions
2025-01-08 13:57:34,472 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-01-08 13:57:37,628 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-01-08 13:59:06,115 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-01-08 13:59:06,288 INFO: Waiting for log aggregation to finish.
2025-01-08 13:59:28,027 INFO: Execution finished successfully.


(Job('titanic_1_offline_fg_materialization', 'SPARK'), None)