In [11]:
import pandas as pd
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [12]:
# Getting the FS
store = FeatureStore(repo_path=".")

In [13]:
# Reading the target as an entity DataFrame
entity_df = pd.read_parquet(path=r"data/target_df.parquet")
feature_service = store.get_feature_service("user_activity")
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=feature_service
)

In [14]:
# Storing the dataset as a local file
dataset = store.create_saved_dataset(
    from_=training_data,
    name="churn_dataset",
    storage=SavedDatasetFileStorage(r"data/churn_dataset.parquet")
)



In [15]:
dataset.to_df().head()

Unnamed: 0,Tenure,Geography_France,event_timestamp,Geography_Germany,Age,CreditScore,USER_ID,EstimatedSalary,Gender,IsActiveMember,Geography_Spain,NumOfProducts,Exited,Balance,HasCrCard
0,4,False,2023-01-01 00:00:00+00:00,True,29,792,3618,18922.18,0,0,False,1,1,107601.79,1
1,3,True,2023-01-01 00:00:00+00:00,False,39,686,3152,136643.84,0,0,False,1,0,111695.62,0
2,9,False,2023-01-01 00:00:00+00:00,True,54,572,4195,195771.95,0,1,False,1,0,97382.53,1
3,6,True,2023-01-01 00:00:00+00:00,False,37,643,8080,142454.77,1,0,False,2,0,0.0,0
4,1,False,2023-01-01 00:00:00+00:00,False,48,587,2599,8908.0,1,1,True,2,0,0.0,1


In [16]:
dataset.to_df().dtypes

Tenure                             int64
Geography_France                    bool
event_timestamp      datetime64[ns, UTC]
Geography_Germany                   bool
Age                                int64
CreditScore                        int64
USER_ID                            int64
EstimatedSalary                  float64
Gender                             int64
IsActiveMember                     int64
Geography_Spain                     bool
NumOfProducts                      int64
Exited                             int64
Balance                          float64
HasCrCard                          int64
dtype: object

### Data visualization

In [17]:
# Verifica dei valori nulli
null_counts = dataset.to_df().isnull().sum()
null_counts

Tenure               0
Geography_France     0
event_timestamp      0
Geography_Germany    0
Age                  0
CreditScore          0
USER_ID              0
EstimatedSalary      0
Gender               0
IsActiveMember       0
Geography_Spain      0
NumOfProducts        0
Exited               0
Balance              0
HasCrCard            0
dtype: int64