# Applying Feature Store using `FEAST`

In [40]:
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# set up to view all the info of the columns
pd.set_option('display.max_columns', None)

os.chdir(r'D:\OneDrive - Northeastern University\Jupyter Notebook\Data Science Projects\CRM-Analysis-for-Marketing-data')

### 1. Preparing the data

In [31]:
data = pd.read_csv('marketing data/marketing_data_with_features.csv')
data.head(5)

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Response,Complain,Country,Age,Is_Parent,Total_Kids,Marital_Status_Simplified,Total_Spent,MntWines_Share,MntFruits_Share,MntMeatProducts_Share,MntFishProducts_Share,MntSweetProducts_Share,MntGoldProds_Share,Total_Purchases,Avg_Spend_per_Purchase,Deal_Rate,Web_Purchase_Share,Web_Visit_to_Purchase_Ratio,Is_Online_Buyer,Customer_Since_Days,Is_Active,Lifetime_Spend_per_Day,Total_Accepted_Campaigns,Campaign_Response_Rate
0,8996,1957,PhD,Married,51381,2,1,11/19/2012,4,230,42,192,49,37,53,12,7,2,8,9,0,0,0,0,0,0,0,GER,68,1,3,Partnered,603,0.381426,0.069652,0.318408,0.08126,0.06136,0.087894,29,20.793103,0.413793,0.241379,1.125,0,4060,1,0.148522,0,0.0
1,1994,1983,Graduation,Married,51381,1,0,11/15/2013,11,5,5,6,0,2,1,1,1,0,2,7,0,0,0,0,0,0,0,US,42,1,1,Partnered,19,0.263158,0.263158,0.315789,0.0,0.105263,0.052632,4,4.75,0.25,0.25,3.5,0,3699,1,0.005137,0,0.0
2,3769,1972,PhD,Together,51381,1,0,3/2/2014,17,25,1,13,0,0,3,1,1,0,3,7,0,0,0,0,0,0,0,AUS,53,1,1,Partnered,42,0.595238,0.02381,0.309524,0.0,0.0,0.071429,5,8.4,0.2,0.2,3.5,0,3592,1,0.011693,0,0.0
3,5255,1986,Graduation,Single,51381,1,0,2/20/2013,19,5,1,3,3,263,362,0,27,0,0,1,0,0,0,0,0,0,0,AUS,39,1,1,Single,637,0.007849,0.00157,0.00471,0.00471,0.412873,0.568289,27,23.592593,0.0,1.0,0.035714,1,3967,1,0.160575,0,0.0
4,8268,1961,PhD,Married,51381,0,1,7/11/2013,23,352,0,27,10,0,15,3,6,1,7,6,0,0,0,0,0,0,0,CA,64,1,1,Partnered,404,0.871287,0.0,0.066832,0.024752,0.0,0.037129,17,23.764706,0.176471,0.352941,0.857143,0,3826,1,0.105593,0,0.0


In [32]:
predictors_df = data.loc[:,data.columns!='Response']
target_df = data['Response']

#### Create timestamp to be added as event_timestamp column in the data set.

In [33]:
timestamps = pd.date_range(end = pd.Timestamp.now(),
                           periods = len(data),freq = 'D').to_frame(name = 'event_timestamp', index = False)

In [34]:
## Adding timestamp column to predictor and target data
predictors_df = pd.concat(objs = [predictors_df, timestamps], axis = 1)
target_df = pd.concat(objs = [target_df, timestamps], axis =1)
predictors_df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Country,Age,Is_Parent,Total_Kids,Marital_Status_Simplified,Total_Spent,MntWines_Share,MntFruits_Share,MntMeatProducts_Share,MntFishProducts_Share,MntSweetProducts_Share,MntGoldProds_Share,Total_Purchases,Avg_Spend_per_Purchase,Deal_Rate,Web_Purchase_Share,Web_Visit_to_Purchase_Ratio,Is_Online_Buyer,Customer_Since_Days,Is_Active,Lifetime_Spend_per_Day,Total_Accepted_Campaigns,Campaign_Response_Rate,event_timestamp
0,8996,1957,PhD,Married,51381,2,1,11/19/2012,4,230,42,192,49,37,53,12,7,2,8,9,0,0,0,0,0,0,GER,68,1,3,Partnered,603,0.381426,0.069652,0.318408,0.08126,0.06136,0.087894,29,20.793103,0.413793,0.241379,1.125,0,4060,1,0.148522,0,0.0,2019-03-23 12:16:22.844474
1,1994,1983,Graduation,Married,51381,1,0,11/15/2013,11,5,5,6,0,2,1,1,1,0,2,7,0,0,0,0,0,0,US,42,1,1,Partnered,19,0.263158,0.263158,0.315789,0.0,0.105263,0.052632,4,4.75,0.25,0.25,3.5,0,3699,1,0.005137,0,0.0,2019-03-24 12:16:22.844474
2,3769,1972,PhD,Together,51381,1,0,3/2/2014,17,25,1,13,0,0,3,1,1,0,3,7,0,0,0,0,0,0,AUS,53,1,1,Partnered,42,0.595238,0.02381,0.309524,0.0,0.0,0.071429,5,8.4,0.2,0.2,3.5,0,3592,1,0.011693,0,0.0,2019-03-25 12:16:22.844474
3,5255,1986,Graduation,Single,51381,1,0,2/20/2013,19,5,1,3,3,263,362,0,27,0,0,1,0,0,0,0,0,0,AUS,39,1,1,Single,637,0.007849,0.00157,0.00471,0.00471,0.412873,0.568289,27,23.592593,0.0,1.0,0.035714,1,3967,1,0.160575,0,0.0,2019-03-26 12:16:22.844474
4,8268,1961,PhD,Married,51381,0,1,7/11/2013,23,352,0,27,10,0,15,3,6,1,7,6,0,0,0,0,0,0,CA,64,1,1,Partnered,404,0.871287,0.0,0.066832,0.024752,0.0,0.037129,17,23.764706,0.176471,0.352941,0.857143,0,3826,1,0.105593,0,0.0,2019-03-27 12:16:22.844474


#### Create a CustomerID column to uniquely identify records with CustomerID in Target dataframe.

In [35]:
cust_id = predictors_df['ID']
target_df = pd.concat(objs = [target_df, cust_id], axis =1)
target_df

Unnamed: 0,Response,event_timestamp,ID
0,0,2019-03-23 12:16:22.844474,8996
1,0,2019-03-24 12:16:22.844474,1994
2,0,2019-03-25 12:16:22.844474,3769
3,0,2019-03-26 12:16:22.844474,5255
4,0,2019-03-27 12:16:22.844474,8268
...,...,...,...
2235,0,2025-05-05 12:16:22.844474,10142
2236,0,2025-05-06 12:16:22.844474,5263
2237,0,2025-05-07 12:16:22.844474,22
2238,0,2025-05-08 12:16:22.844474,528


#### Saving the data in `parquet format`

In [36]:
pwd

'D:\\OneDrive - Northeastern University\\Jupyter Notebook\\Data Science Projects\\CRM-Analysis-for-Marketing-data'

In [37]:
predictors_df.to_parquet(path='./marketing data/predictors_df.parquet')
target_df.to_parquet(path='./marketing data/target_df.parquet')

### 2. Do Feast Initialization 

In [None]:
!feast version

#### Feast init

In [None]:
!feast init feature_repo

### 3. Update feature store yaml file if needed

### 4. Define Feature Definations in a python file inside feature repo directory (created using feast init)

### 5. Do Feast apply

In [50]:
pwd

'D:\\OneDrive - Northeastern University\\Jupyter Notebook\\Data Science Projects\\CRM-Analysis-for-Marketing-data'

In [51]:
cd feature_repo

D:\OneDrive - Northeastern University\Jupyter Notebook\Data Science Projects\CRM-Analysis-for-Marketing-data\feature_repo


In [58]:
!feast apply

No project found in the repository. Using project name feature_repo defined in feature_store.yaml
Applying changes for project feature_repo
Created project feature_repo
Created entity customer
Created feature view predictors_df_feature_view
Created feature view target_df_feature_view

Created sqlite table feature_repo_predictors_df_feature_view
Created sqlite table feature_repo_target_df_feature_view



  patient = Entity(name="customer", join_keys=["ID"])


In [61]:
from feast import FeatureStore
import pandas as pd

fs = FeatureStore(repo_path=".")
entity_df = pd.read_parquet("data/target_df.parquet")

training_df = fs.get_historical_features(
    entity_df=entity_df,
    features=[
        "predictors_df_feature_view: Age",
        "predictors_df_feature_view: Total_Spent",
        "target_df_feature_view: Response",
    ],
).to_df()

training_df.head()



Unnamed: 0,Response,event_timestamp,ID,Age,Total_Spent,Response__
0,0,2019-03-23 12:16:22.844474+00:00,8996,68,603,0
1,0,2019-03-24 12:16:22.844474+00:00,1994,42,19,0
2,0,2019-03-25 12:16:22.844474+00:00,3769,53,42,0
3,0,2019-03-26 12:16:22.844474+00:00,5255,39,637,0
4,0,2019-03-27 12:16:22.844474+00:00,8268,64,404,0
