In [2]:
import pandas as pd
from feast import FeatureStore
from feast.infra.offline_stores.file_source import SavedDatasetFileStorage

In [3]:
# Getting the FS
store = FeatureStore(repo_path=".")

In [6]:
fs = FeatureStore("./")
fs.get_feature_view("df1_feature_view")

<FeatureView(name = df1_feature_view, entities = ['USER_ID'], ttl = 2000 days, 0:00:00, stream_source = None, batch_source = {
  "type": "BATCH_FILE",
  "timestampField": "event_timestamp",
  "fileOptions": {
    "uri": "data/data_df1.parquet"
  },
  "name": "data/data_df1.parquet"
}, entity_columns = [USER_ID-Int64], features = [Gender-Int64, Age-Int64], description = , tags = {}, owner = , projection = FeatureViewProjection(name='df1_feature_view', name_alias=None, desired_features=[], features=[Gender-Int64, Age-Int64], join_key_map={}), created_timestamp = 2024-05-29 15:31:00.582040, last_updated_timestamp = 2024-05-29 15:31:00.582040, online = True, materialization_intervals = [])>

In [3]:
# Reading the target as an entity DataFrame
entity_df = pd.read_parquet(path=r"data/target_df.parquet")
feature_service = store.get_feature_service("user_activity")
training_data = store.get_historical_features(
    entity_df=entity_df,
    features=feature_service
)

In [4]:
# Storing the dataset as a local file
dataset = store.create_saved_dataset(
    from_=training_data,
    name="churn_dataset",
    storage=SavedDatasetFileStorage(r"data/churn_dataset.parquet")
)



In [5]:
dataset.to_df().head()

Unnamed: 0,NumOfProducts,event_timestamp,HasCrCard,Exited,Age,Balance,Geography_Spain,CreditScore,Geography_France,EstimatedSalary,Gender,Geography_Germany,IsActiveMember,Tenure,USER_ID
0,1,2023-01-01 00:00:00+00:00,0,0,55,81370.07,True,648,False,181534.04,1,False,1,1,3145
1,2,2023-01-01 00:00:00+00:00,0,0,23,0.0,False,669,True,66088.83,1,False,0,1,723
2,1,2023-01-01 00:00:00+00:00,1,1,49,173434.9,False,625,True,165580.93,1,False,0,6,1021
3,2,2023-01-01 00:00:00+00:00,1,0,32,0.0,False,715,True,60907.49,0,False,0,10,8860
4,2,2023-01-01 00:00:00+00:00,1,0,31,0.0,False,838,True,8222.96,1,False,0,2,9836


In [6]:
dataset.to_df().dtypes

NumOfProducts                      int64
event_timestamp      datetime64[ns, UTC]
HasCrCard                          int64
Exited                             int64
Age                                int64
Balance                          float64
Geography_Spain                     bool
CreditScore                        int64
Geography_France                    bool
EstimatedSalary                  float64
Gender                             int64
Geography_Germany                   bool
IsActiveMember                     int64
Tenure                             int64
USER_ID                            int64
dtype: object

In [7]:
# Verifica dei valori nulli
null_counts = dataset.to_df().isnull().sum()
null_counts

NumOfProducts        0
event_timestamp      0
HasCrCard            0
Exited               0
Age                  0
Balance              0
Geography_Spain      0
CreditScore          0
Geography_France     0
EstimatedSalary      0
Gender               0
Geography_Germany    0
IsActiveMember       0
Tenure               0
USER_ID              0
dtype: int64

In [8]:
dataset.to_df().tail()

Unnamed: 0,NumOfProducts,event_timestamp,HasCrCard,Exited,Age,Balance,Geography_Spain,CreditScore,Geography_France,EstimatedSalary,Gender,Geography_Germany,IsActiveMember,Tenure,USER_ID
9995,2,2023-12-31 00:00:00+00:00,1,0,33,0.0,False,630,True,64804.59,1,False,0,9,5679
9996,1,2023-12-31 00:00:00+00:00,1,0,64,62979.93,True,598,False,152273.57,0,False,1,1,7710
9997,2,2023-12-31 00:00:00+00:00,1,0,40,0.0,False,651,True,147715.83,1,False,1,4,1094
9998,1,2023-12-31 00:00:00+00:00,0,1,50,81947.76,False,630,True,63606.22,1,False,1,1,1501
9999,1,2023-12-31 00:00:00+00:00,1,0,33,164385.53,False,845,True,150664.97,1,False,0,8,2193
