**Install Featureform**


In [None]:
!pip3 install featureform
import featureform as ff

**Get Run Variant**

In [None]:
run_variant = ff.get_run()

**Download and register transaction file**

In [None]:
import requests
url = "https://featureform-demo-files.s3.amazonaws.com/transactions.csv"
r = requests.get(url)
open("transactions.csv" , 'wb').write(r.content)


In [None]:
from featureform import local

transactions = local.register_file(
    name="transactions",
    description="A dataset of fraudulent transactions",
    path="transactions.csv"
)
df = transactions.pandas()
df.head()

**Define and test a few transformations**

In [None]:
@local.df_transformation(inputs=[transactions])
def average_user_transaction(transactions):
    return transactions.groupby("CustomerID")["TransactionAmount"].mean()

avg = average_user_transaction(df)
avg.head()

In [None]:
avg.plot(kind="hist")

In [None]:
@local.df_transformation(variant="clipped",
                         inputs=[average_user_transaction])
def average_user_transaction(transactions):
    max = transactions.mean() + 2 * transactions.std()
    return transactions.clip(lower=0, upper=max)

average_user_transaction(avg).plot(kind="hist")

**Registering resources (Entity, feature, label)**

In [None]:
user = ff.register_entity("user")
# Register a column from our transformation as a feature
average_user_transaction.register_resources(
    entity=user,
    entity_column="CustomerID",
    inference_store=local,
    features=[
        {"name": "avg_transactions", "column": "TransactionAmount", "type": "float32"},
    ],
)

# Register a label from our base Transactions table
transactions.register_resources(
    entity=user,
    entity_column="CustomerID",
    labels=[
        {"name": "fraudulent", "column": "IsFraud", "type": "bool"},
    ],
)

**Registering training set**

In [None]:
ff.register_training_set(
    "fraud_training",
    label="fraudulent",
    features=["avg_transactions"],
)


**Apply our definitions**

In [None]:
client = ff.ResourceClient(local=True)
client.apply()

**Train a Model**

In [None]:
serving = ff.Client(local=True)
dataset = serving.training_set("fraud_training", run_variant)
training_dataset = dataset.repeat(1).shuffle(1000).batch(12)

In [None]:
from sklearn.linear_model import SGDClassifier

classifier = SGDClassifier(loss='log_loss')
for batch in training_dataset:
  classifier.partial_fit(batch.features(), batch.label(), [True, False])

# Get one batch of training data as sample test data
# Make a test prediction
classifier.predict(batch.features())
