In [1]:
import pandas as pd
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [2]:
# Getting the FS
store = FeatureStore(repo_path=".")

In [3]:
# Reading the target as an entity DataFrame
entity_df = pd.read_parquet(path=r"data/target_df.parquet")
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "df1_feature_view:Gender",
        "df1_feature_view:Age",
        "df2_feature_view:CreditScore",
        "df2_feature_view:Tenure",
        "df2_feature_view:Balance",
        "df2_feature_view:EstimatedSalary",
        "df3_feature_view:NumOfProducts",
        "df3_feature_view:HasCrCard",
        "df3_feature_view:IsActiveMember",
        "df4_feature_view:Geography_France",
        "df4_feature_view:Geography_Germany",
        "df4_feature_view:Geography_Spain"
    ]
)

In [4]:
# Storing the dataset as a local file
dataset = store.create_saved_dataset(
    from_=training_data,
    name="churn_dataset",
    storage=SavedDatasetFileStorage(r"data\churn_dataset.parquet")
)



In [5]:
dataset.to_df().head()

Unnamed: 0,Age,event_timestamp,Tenure,Geography_Spain,Balance,HasCrCard,EstimatedSalary,NumOfProducts,IsActiveMember,CreditScore,Geography_Germany,USER_ID,Exited,Geography_France,Gender
0,37,2023-01-01 00:00:00+00:00,6,False,131753.41,1,86894.67,1,0,648,True,9417,0,False,1
1,36,2023-01-01 00:00:00+00:00,6,False,147137.74,1,33687.9,1,1,699,True,6186,0,False,1
2,49,2023-01-01 00:00:00+00:00,3,False,0.0,1,142917.54,2,1,611,False,792,0,True,1
3,33,2023-01-01 00:00:00+00:00,5,False,0.0,1,122949.71,2,0,635,False,9133,0,True,0
4,43,2023-01-01 00:00:00+00:00,10,False,105301.5,1,78941.59,1,1,818,True,9145,0,False,1


In [6]:
dataset.to_df().dtypes

Age                                int64
event_timestamp      datetime64[ns, UTC]
Tenure                             int64
Geography_Spain                     bool
Balance                          float64
HasCrCard                          int64
EstimatedSalary                  float64
NumOfProducts                      int64
IsActiveMember                     int64
CreditScore                        int64
Geography_Germany                   bool
USER_ID                            int64
Exited                             int64
Geography_France                    bool
Gender                             int64
dtype: object

### Data visualization

In [7]:
# Verifica dei valori nulli
null_counts = dataset.to_df().isnull().sum()
null_counts

Age                  0
event_timestamp      0
Tenure               0
Geography_Spain      0
Balance              0
HasCrCard            0
EstimatedSalary      0
NumOfProducts        0
IsActiveMember       0
CreditScore          0
Geography_Germany    0
USER_ID              0
Exited               0
Geography_France     0
Gender               0
dtype: int64