In [1]:
import pandas as pd
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [2]:
# Getting the FS
store = FeatureStore(repo_path=".")

In [3]:
# Reading the target as an entity DataFrame
entity_df = pd.read_parquet(path=r"data/target_df_2.parquet")
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=[
        "df1_2_feature_view:Gender",
        "df1_2_feature_view:Age",
        "df2_2_feature_view:CreditScore",
        "df2_2_feature_view:Tenure",
        "df2_2_feature_view:Balance",
        "df2_2_feature_view:EstimatedSalary",
        "df3_2_feature_view:NumOfProducts",
        "df3_2_feature_view:HasCrCard",
        "df3_2_feature_view:IsActiveMember",
        "df4_2_feature_view:Geography_France",
        "df4_2_feature_view:Geography_Germany",
        "df4_2_feature_view:Geography_Spain"
    ]
)

In [0]:
# Storing the dataset as a local file
dataset = store.create_saved_dataset(
    from_=training_data,
    name="churn_dataset",
    storage=SavedDatasetFileStorage(r"data\churn_dataset.parquet")
)

In [7]:
dataset.to_df().head()

Unnamed: 0,EstimatedSalary,HasCrCard,IsActiveMember,CreditScore,Exited,Age,NumOfProducts,Tenure,Balance,Geography_France,USER_ID,Geography_Spain,Geography_Germany,Gender,event_timestamp
0,193309.58,0,0,613,0,40,1,7,124339.9,True,9195,False,False,1,2023-01-01 00:00:00+00:00
1,45026.23,1,0,581,1,44,2,7,189318.16,False,7283,True,False,0,2023-01-01 00:00:00+00:00
2,111402.97,1,1,544,1,64,1,3,124043.8,True,1520,False,False,0,2023-01-01 00:00:00+00:00
3,78896.59,0,1,810,0,32,2,9,120879.73,True,8200,False,False,1,2023-01-01 00:00:00+00:00
4,5669.31,1,0,850,1,45,4,5,174088.3,False,4167,True,False,0,2023-01-01 00:00:00+00:00


In [5]:
dataset.to_df().dtypes

EstimatedSalary                  float64
HasCrCard                          int64
IsActiveMember                     int64
CreditScore                        int64
Exited                             int64
Age                                int64
NumOfProducts                      int64
Tenure                             int64
Balance                          float64
Geography_France                    bool
USER_ID                            int64
Geography_Spain                     bool
Geography_Germany                   bool
Gender                             int64
event_timestamp      datetime64[ns, UTC]
dtype: object

In [6]:
# Verifica dei valori nulli
null_counts = dataset.to_df().isnull().sum()
null_counts

EstimatedSalary      0
HasCrCard            0
IsActiveMember       0
CreditScore          0
Exited               0
Age                  0
NumOfProducts        0
Tenure               0
Balance              0
Geography_France     0
USER_ID              0
Geography_Spain      0
Geography_Germany    0
Gender               0
event_timestamp      0
dtype: int64