In [1]:
!pip install featureform

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting featureform
  Downloading featureform-1.10.0-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting typeguard<3.0.0 (from featureform)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Collecting grpcio>=1.47.0 (from featureform)
  Downloading grpcio-1.56.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Collecting pandas==1.3.5 (from featureform)
  Downloading pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pandasql>=0.7.3 (fro

In [2]:
import featureform as ff



In [3]:
client = ff.Client(host="featureform-redis-aws-immersion.featureform.com")

In [37]:
redis = ff.register_redis(
    name = "redis",
    host="featureform-quickstart-redis", # The internal dns name for redis
    port=6379,
    description = "A Redis deployment we created for the Featureform quickstart"
)

postgres = ff.register_postgres(
    name = "postgres",
    host="featureform-quickstart-postgres", # The internal dns name for postgres
    port="5432",
    user="postgres",
    password="password",
    database="postgres",
    description = "A Postgres deployment we created for the Featureform quickstart"
)

In [38]:
ff.set_run("experiment")

transactions = postgres.register_table(
    name = "transactions",
    description = "Fraud Dataset From Kaggle",
    table = "Transactions", # This is the table's name in Postgres
)

In [39]:
@postgres.sql_transformation()
def average_user_transaction():
    """the average transaction amount for a user """
    return "SELECT CustomerID as user_id, avg(TransactionAmount) " \
           "as avg_transaction_amt from {{transactions.experiment}} GROUP BY user_id"

In [40]:
@ff.entity
class User:
    # Register a column from our transformation as a feature
    avg_transactions = ff.Feature(
        average_user_transaction[["user_id", "avg_transaction_amt"]], # We can optional include the `timestamp_column` "timestamp" here
        type=ff.Float32,
        inference_store=redis,
        variant="immersion"
    )
    # Register label from our base Transactions table
    fraudulent = ff.Label(
        transactions[["customerid", "isfraud"]], type=ff.Bool, variant="immersion",
    )

In [41]:
ff.register_training_set(
    "fraud_training", "immersion",
    label=("fraudulent", "immersion"),
    features=[("avg_transactions", "immersion")],
)

In [42]:
client.apply()

Applying Run: run3
Creating provider postgres
Creating provider redis
Creating source average_user_transaction run3
Creating source transactions run3
Creating entity user
Creating feature avg_transactions i3
Creating label fraudulent i3
Creating training-set fraud_training i3


In [49]:
from sklearn.model_selection import train_test_split
df = client.training_set("fraud_training", "immersion").dataframe()
label_name = "label__fraudulent__immersion"
label_clm = df.pop(label_name)
df.insert(0, label_name, label_clm)
train, test = train_test_split(df, test_size=0.2)

In [52]:
import sagemaker
from sagemaker.inputs import TrainingInput

session = sagemaker.Session()

train.to_csv("train.csv", header=False)
test.to_csv("test.csv", header=False)
train_path = session.upload_data("train.csv")
test_path = session.upload_data("test.csv")

# initialize hyperparameters
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"reg:squarederror",
        "num_round":"50"}


# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
# specify the repo_version depending on your preference.
xgboost_container = sagemaker.image_uris.retrieve("xgboost", "us-east-1", "1.7-1")

# construct a SageMaker estimator that calls the xgboost-container
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          instance_count=1, 
                                          instance_type='ml.m5.large', 
                                          volume_size=5)

# define the data type and paths to the training and validation datasets
content_type = "text/csv"
train_input = TrainingInput(train_path, content_type=content_type)
validation_input = TrainingInput(test_path, content_type=content_type)

# execute the XGBoost training job
estimator.fit({'train': train_input, 'validation': validation_input})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-07-25-11-58-52-600


2023-07-25 11:58:52 Starting - Starting the training job...
2023-07-25 11:59:08 Starting - Preparing the instances for training......
2023-07-25 12:00:09 Downloading - Downloading input data...
2023-07-25 12:00:49 Training - Downloading the training image.....[34m[2023-07-25 12:01:31.882 ip-10-0-163-62.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-07-25 12:01:31.907 ip-10-0-163-62.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-07-25:12:01:32:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-07-25:12:01:32:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2023-07-25:12:01:32:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-07-25:12:01:32:INFO] Running XGBoost Sagemaker in algorithm mode[0m
[34m[2023-07-25:12:01:32:INFO] Determined 0 GPU(s) available on the instance.[0m
[34m[2023-



In [None]:
client.features([User.avg_transactions], entities={"user": "C2421688"})