In [4]:
import pandas as pd
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [5]:
# Getting the FS
store = FeatureStore(repo_path=".")

In [6]:
# Reading the target as an entity DataFrame
entity_df = pd.read_parquet(path=r"data/target_df.parquet")
feature_service = store.get_feature_service("user_activity")
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=feature_service
)

In [7]:
# Storing the dataset as a local file
dataset = store.create_saved_dataset(
    from_=training_data,
    name="churn_dataset",
    storage=SavedDatasetFileStorage(r"data/churn_dataset.parquet")
)



In [8]:
dataset.to_df().head()

Unnamed: 0,Tenure,Balance,USER_ID,Age,NumOfProducts,Exited,event_timestamp,Geography_Germany,Geography_Spain,EstimatedSalary,CreditScore,IsActiveMember,Gender,Geography_France,HasCrCard
0,7,0.0,7670,38,2,0,2023-01-01 00:00:00+00:00,False,True,181605.85,631,0,1,False,1
1,7,95984.21,9729,43,1,1,2023-01-01 00:00:00+00:00,True,False,115262.54,616,1,0,False,0
2,1,160541.0,5378,72,2,0,2023-01-01 00:00:00+00:00,False,False,142223.94,642,1,1,True,1
3,5,108891.7,7693,74,1,0,2023-01-01 00:00:00+00:00,True,False,10078.02,634,0,1,False,1
4,5,83348.89,6496,39,3,1,2023-01-01 00:00:00+00:00,True,False,7953.62,617,0,0,False,1


In [9]:
dataset.to_df().dtypes

Tenure                             int64
Balance                          float64
USER_ID                            int64
Age                                int64
NumOfProducts                      int64
Exited                             int64
event_timestamp      datetime64[ns, UTC]
Geography_Germany                   bool
Geography_Spain                     bool
EstimatedSalary                  float64
CreditScore                        int64
IsActiveMember                     int64
Gender                             int64
Geography_France                    bool
HasCrCard                          int64
dtype: object

### Data visualization

In [10]:
# Verifica dei valori nulli
null_counts = dataset.to_df().isnull().sum()
null_counts

Tenure               0
Balance              0
USER_ID              0
Age                  0
NumOfProducts        0
Exited               0
event_timestamp      0
Geography_Germany    0
Geography_Spain      0
EstimatedSalary      0
CreditScore          0
IsActiveMember       0
Gender               0
Geography_France     0
HasCrCard            0
dtype: int64