# Setup

In [1]:
%load_ext autoreload
%autoreload 2 

In [2]:
import pandas as pd
from sqlalchemy import create_engine
from datasets import load_dataset
from pydantic import BaseModel
from dotenv import load_dotenv
from loguru import logger
from datetime import timedelta
import sys
import dask.dataframe as dd
from sqlalchemy.types import Text, ARRAY, JSON
import gc

sys.path.insert(0, "..")
sys.path.insert(1, "../../")

from src.utils.data_prep import parse_dt, handle_dtypes
from src.utils.data_prep import insert_chunk_to_oltp
import os

In [3]:
_ = load_dotenv(override=True)

## Arguments

In [4]:
class Args(BaseModel):
    random_seed: int = 41
    testing: bool = False

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"
    
    train_fp: str = "../../data_for_ai/interim/train_sample_interactions_16407u.parquet"
    val_fp: str = "../../data_for_ai/interim/val_sample_interactions_16407u.parquet"

    transaction_table_name: str = "amz_rating"
    metadata_table_name: str = "amz_metadata"

    hf_dataset_name: str = "McAuley-Lab/Amazon-Reviews-2023"
    amz_metadata_hf_dataset_path: str = "raw_meta_Electronics"

    # Number of days left out not being pushed into the OLTP so that later we can simulate having them as new data
    num_days_holdout: int = 30
    holdout_fp: str = "../../data_for_ai/experimental/holdout.parquet"

    data_persist_dir: str = None

    def init(self):
        if not self.testing:
            self.data_persist_dir = os.path.abspath("../../data_for_ai/raw")   
            os.makedirs(self.data_persist_dir, exist_ok=True)
        return self

args = Args().init()
print(args.model_dump_json(indent=2))


{
  "random_seed": 41,
  "testing": false,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "train_fp": "../../data_for_ai/interim/train_sample_interactions_16407u.parquet",
  "val_fp": "../../data_for_ai/interim/val_sample_interactions_16407u.parquet",
  "transaction_table_name": "amz_rating",
  "metadata_table_name": "amz_metadata",
  "hf_dataset_name": "McAuley-Lab/Amazon-Reviews-2023",
  "amz_metadata_hf_dataset_path": "raw_meta_Electronics",
  "num_days_holdout": 30,
  "holdout_fp": "../../data_for_ai/experimental/holdout.parquet",
  "data_persist_dir": "/home/dinhln/Desktop/real_time_recsys/data_for_ai/raw"
}


## Load dataset

In [5]:
amz_metadata_raw = load_dataset(args.hf_dataset_name,
                                args.amz_metadata_hf_dataset_path,
                                split="full",
                                trust_remote_code=True)

In [6]:
amz_metadata_df = amz_metadata_raw.to_pandas()
amz_metadata_df.drop_duplicates(subset=[args.item_col], inplace=True)

logger.info(f"amz_metadata_df.shape: {amz_metadata_df.shape}")

[32m2025-06-22 15:04:56.047[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mamz_metadata_df.shape: (1610012, 16)[0m


In [7]:
with pd.option_context('display.max_colwidth',100):
    display(amz_metadata_df.head(5))

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,All Electronics,FS-1051 FATSHARK TELEPORTER V3 HEADSET,3.5,6,[],[Teleporter V3 The “Teleporter V3” kit sets a new level of value in the FPV world with Fat Shark...,,"{'hi_res': [None], 'large': ['https://m.media-amazon.com/images/I/41qrX56lsYL._AC_.jpg'], 'thumb...","{'title': [], 'url': [], 'user_id': []}",Fat Shark,"[Electronics, Television & Video, Video Glasses]","{""Date First Available"": ""August 2, 2014"", ""Manufacturer"": ""Fatshark""}",B00MCW7G9M,,,
1,All Electronics,Ce-H22B12-S1 4Kx2K Hdmi 4Port,5.0,1,"[UPC: 662774021904, Weight: 0.600 lbs]",[HDMI In - HDMI Out],,"{'hi_res': ['https://m.media-amazon.com/images/I/51qxU4Zd5TL._AC_SL1050_.jpg', 'https://m.media-...","{'title': [], 'url': [], 'user_id': []}",SIIG,"[Electronics, Television & Video, Accessories, Cables, HDMI Cables]","{""Product Dimensions"": ""0.83 x 4.17 x 2.05 inches"", ""Item Weight"": ""5.3 ounces"", ""Item model num...",B00YT6XQSE,,,
2,Computers,Digi-Tatoo Decal Skin Compatible With MacBook Pro 13 inch (Model A2338/ A2289/ A2251) - Protecti...,4.5,246,[WARNING: Please IDENTIFY MODEL NUMBER on the bottom of your Macbook. Only fits for model A2338/...,[],19.99,"{'hi_res': ['https://m.media-amazon.com/images/I/61RPxmi+mPL._AC_SL1500_.jpg', 'https://m.media-...","{'title': ['AL 2Sides Video', 'MacBook Protective Skin', 'ARTSO 15.6"" Laptop Skin Sticker Univer...",Digi-Tatoo,"[Electronics, Computers & Accessories, Laptop Accessories, Skins & Decals, Decals]","{""Brand"": ""Digi-Tatoo"", ""Color"": ""Fresh Marble"", ""Room Type"": ""Bedroom"", ""Material"": ""Vinyl"", ""T...",B07SM135LS,,,
3,AMAZON FASHION,NotoCity Compatible with Vivoactive 4 band 22mm Quick Release Silicone Bands/Garmin Darth Vader/...,4.5,233,[☛NotoCity 22mm band is designed for Vivoactive 4 / Samsung Gear S3 Classic / S3 Frontier / Gear...,[],9.99,"{'hi_res': ['https://m.media-amazon.com/images/I/51ajKKbi76L._AC_UL1000_.jpg', 'https://m.media-...","{'title': [], 'url': [], 'user_id': []}",NotoCity,"[Electronics, Wearable Technology, Clips, Arm & Wristbands]","{""Date First Available"": ""May 29, 2020"", ""Manufacturer"": ""NotoCity""}",B089CNGZCW,,,
4,Cell Phones & Accessories,Motorola Droid X Essentials Combo Pack,3.8,64,"[New Droid X Essentials Combo Pack, Exclusive Package Incredible Value Worth $145!!!, Includes a...","[all Genuine High Quality Motorola Made Accessories, including Multimedia Station with HDMI tech...",14.99,"{'hi_res': [None, None, None, None, None], 'large': ['https://m.media-amazon.com/images/I/51-DXS...","{'title': [], 'url': [], 'user_id': []}",Verizon,"[Electronics, Computers & Accessories, Computer Accessories & Peripherals, Memory Cards, Micro S...","{""Product Dimensions"": ""11.6 x 6.9 x 3.1 inches"", ""Item Weight"": ""1.5 pounds"", ""Other display fe...",B004E2Z88O,,,


In [8]:
cols = ["parent_asin", "main_category", "title", "description", "price", "categories", "details"]
metadata_df = amz_metadata_df[cols]
metadata_df.head(5)

Unnamed: 0,parent_asin,main_category,title,description,price,categories,details
0,B00MCW7G9M,All Electronics,FS-1051 FATSHARK TELEPORTER V3 HEADSET,[Teleporter V3 The “Teleporter V3” kit sets a ...,,"[Electronics, Television & Video, Video Glasses]","{""Date First Available"": ""August 2, 2014"", ""Ma..."
1,B00YT6XQSE,All Electronics,Ce-H22B12-S1 4Kx2K Hdmi 4Port,[HDMI In - HDMI Out],,"[Electronics, Television & Video, Accessories,...","{""Product Dimensions"": ""0.83 x 4.17 x 2.05 inc..."
2,B07SM135LS,Computers,Digi-Tatoo Decal Skin Compatible With MacBook ...,[],19.99,"[Electronics, Computers & Accessories, Laptop ...","{""Brand"": ""Digi-Tatoo"", ""Color"": ""Fresh Marble..."
3,B089CNGZCW,AMAZON FASHION,NotoCity Compatible with Vivoactive 4 band 22m...,[],9.99,"[Electronics, Wearable Technology, Clips, Arm ...","{""Date First Available"": ""May 29, 2020"", ""Manu..."
4,B004E2Z88O,Cell Phones & Accessories,Motorola Droid X Essentials Combo Pack,[all Genuine High Quality Motorola Made Access...,14.99,"[Electronics, Computers & Accessories, Compute...","{""Product Dimensions"": ""11.6 x 6.9 x 3.1 inche..."


In [9]:
amz_metadata_df[args.item_col].nunique()

1610012

### Get rating sample

In [10]:
if not os.path.exists(args.train_fp):
    raise Exception(
        f"{args.train_fp} does not exist, you need to run the notebook 000-prep-data in the parent folder first"
    )

train_df = pd.read_parquet(args.train_fp)
val_df = pd.read_parquet(args.val_fp)

In [11]:
train_df

Unnamed: 0,user_id,parent_asin,rating,timestamp
3194,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,2.0,2012-06-11 16:41:10
3199,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,3.0,2012-08-02 02:04:13
3200,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,5.0,2012-09-15 16:34:46
3204,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,5.0,2013-01-03 23:08:45
3208,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,4.0,2013-05-06 01:24:39
...,...,...,...,...
40882304,AFB4DWWKZBQFS22FAWDEP37EL2FA,B00KAF5RQ2,5.0,2016-02-22 17:44:10
40882305,AFB4DWWKZBQFS22FAWDEP37EL2FA,B001F6TXME,5.0,2016-02-22 17:44:40
40882306,AFB4DWWKZBQFS22FAWDEP37EL2FA,B007VGGIB6,5.0,2016-02-22 17:45:10
40882307,AFB4DWWKZBQFS22FAWDEP37EL2FA,B00WUID73W,5.0,2016-02-22 17:45:37


## Megre metadata

In [12]:
cols = ["main_category", "title", "description", "categories", "price"]

# Merge the item features into the interaction data
train_features_df = pd.merge(
    train_df, amz_metadata_df[[args.item_col] + cols], how="left", on=args.item_col
)
val_features_df = pd.merge(
    val_df, amz_metadata_df[[args.item_col] + cols], how="left", on=args.item_col
)
train_features_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,main_category,title,description,categories,price
0,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,2.0,2012-06-11 16:41:10,Computers,Targus CleanVu Cleaning Pads Touch Screen Devi...,[The Targus CleanVu Cleaning Pads for iPad off...,"[Electronics, Television & Video, Accessories,...",6.99
1,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,3.0,2012-08-02 02:04:13,Computers,Arkon Folding Tablet Stand Compatible with App...,[IPM-TAB1 is a highly versatile folding tablet...,"[Electronics, Computers & Accessories, Tablet ...",14.95
2,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,5.0,2012-09-15 16:34:46,Amazon Devices,Amazon Kindle 9W PowerFast Adapter for Acceler...,[],"[Electronics, Computers & Accessories, Tablet ...",
3,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,5.0,2013-01-03 23:08:45,Computers,amFilm (TM) Premium Screen Protector Film Clea...,[Introducing amFilm® Premium Screen Protector ...,"[Electronics, Computers & Accessories, Tablet ...",
4,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,4.0,2013-05-06 01:24:39,Computers,Seagate Wireless Plus 1TB Portable Hard Drive ...,"[With Seagate Wireless Plus, you can enjoy you...","[Electronics, Computers & Accessories, Data St...",
...,...,...,...,...,...,...,...,...,...
127387,AFB4DWWKZBQFS22FAWDEP37EL2FA,B00KAF5RQ2,5.0,2016-02-22 17:44:10,Camera & Photo,STK EN-EL14 EN-EL14a Battery Pack for Nikon D3...,[The STK Nikon EN-EL14 battery is 100% compati...,"[Electronics, Camera & Photo, Accessories, Bat...",14.99
127388,AFB4DWWKZBQFS22FAWDEP37EL2FA,B001F6TXME,5.0,2016-02-22 17:44:40,Camera & Photo,Nikon 25395 MC-DC2 Remote Release Cord (1 Meter),[Nikon MC-DC2 Remote Release Cord for Nikon Di...,"[Electronics, Camera & Photo, Accessories, Dig...",26.95
127389,AFB4DWWKZBQFS22FAWDEP37EL2FA,B007VGGIB6,5.0,2016-02-22 17:45:10,Camera & Photo,Nikon WU-1a Wireless Mobile Adapter 27081 for ...,[The WU-1a is compatible with the following Ni...,"[Electronics, Camera & Photo, Accessories, Bat...",98.89
127390,AFB4DWWKZBQFS22FAWDEP37EL2FA,B00WUID73W,5.0,2016-02-22 17:45:37,Camera & Photo,eForCity Leather Hand Grip Strap Compatible wi...,[Product Description Cushioned camera grips fi...,"[Electronics, Camera & Photo, Bags & Cases, Bi...",


In [13]:
full_df = (
    pd.concat(
        [train_features_df, val_features_df],
        axis=0,
    )
    .assign(
        description=lambda df: df["description"].apply(list),
        categories=lambda df: df["categories"].apply(list),
    )
    .pipe(handle_dtypes)
)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,main_category,title,description,categories,price
0,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,2.0,2012-06-11 16:41:10.000,Computers,Targus CleanVu Cleaning Pads Touch Screen Devi...,[The Targus CleanVu Cleaning Pads for iPad off...,"[Electronics, Television & Video, Accessories,...",6.99
1,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,3.0,2012-08-02 02:04:13.000,Computers,Arkon Folding Tablet Stand Compatible with App...,[IPM-TAB1 is a highly versatile folding tablet...,"[Electronics, Computers & Accessories, Tablet ...",14.95
2,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,5.0,2012-09-15 16:34:46.000,Amazon Devices,Amazon Kindle 9W PowerFast Adapter for Acceler...,[],"[Electronics, Computers & Accessories, Tablet ...",
3,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,5.0,2013-01-03 23:08:45.000,Computers,amFilm (TM) Premium Screen Protector Film Clea...,[Introducing amFilm® Premium Screen Protector ...,"[Electronics, Computers & Accessories, Tablet ...",
4,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,4.0,2013-05-06 01:24:39.000,Computers,Seagate Wireless Plus 1TB Portable Hard Drive ...,"[With Seagate Wireless Plus, you can enjoy you...","[Electronics, Computers & Accessories, Data St...",
...,...,...,...,...,...,...,...,...,...
3474,AHIIISHZP6YAVVHMDEBLJ5CWZ7ZA,B0BZ62FQ13,3.0,2021-07-16 17:08:55.044,Cell Phones & Accessories,"OtterBox iPhone SE 3rd & 2nd Gen, iPhone 8 & i...",[Commuter Series is the compact case that keep...,"[Electronics, Electronics Gift Guide - Top Tec...",
3475,AFTE3G43QHXWD3DJGDCI2DHEWQJQ,B08DMXDPW5,5.0,2021-01-14 01:48:09.423,Amazon Devices,Echo (4th Gen) | Glacier White with Sengled Bl...,[],[],114.98
3476,AFENZZDPVUYFVBS47YDOWJCDYBSQ,B09XBT6DS9,4.0,2021-12-05 00:35:40.874,Home Audio & Theater,Ultra Clarity Cables High Speed HDMI Extension...,[],"[Electronics, Television & Video, Accessories,...",10.99
3477,AFMBZYPDAXT5VO3ME67HW5Q5TAOQ,B097KBF8JK,5.0,2022-02-18 11:32:46.732,Computers,Corsair Vengeance LPX 128GB (4x32GB) DDR4 3200...,[Corsair Vengeance LPX],"[Electronics, Computers & Accessories, Compute...",279.99


In [14]:
import plotly.express as px
px.bar(
    data_frame=full_df.groupby(pd.Grouper(key="timestamp", freq="D"))
    .size()
    .reset_index(name="count"),
    x="timestamp",
    y="count",
    title="Number of data points over time",
)

## Get holdout in order to demo data pipeline

In [15]:
holdout_min_date = full_df[args.timestamp_col].max() - timedelta(days=args.num_days_holdout)

logger.info(f"The original shape of full_df: {full_df.shape}")
full_df_train = full_df.loc[lambda df: df[args.timestamp_col] < holdout_min_date]
logger.info(f"The shape of full_df after splitted: {full_df_train.shape}")

holdout_rating_df = full_df.loc[lambda df: df[args.timestamp_col] >= holdout_min_date]
logger.info(f"The shape of holdout_rating_df: {holdout_rating_df.shape}")

assert len(full_df_train) + len(holdout_rating_df) == len(full_df), "Total length of 2 subsets should be equal to the original length"

[32m2025-06-22 15:05:00.000[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mThe original shape of full_df: (130871, 9)[0m
[32m2025-06-22 15:05:00.017[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mThe shape of full_df after splitted: (130658, 9)[0m
[32m2025-06-22 15:05:00.020[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mThe shape of holdout_rating_df: (213, 9)[0m


In [16]:
full_df_train

Unnamed: 0,user_id,parent_asin,rating,timestamp,main_category,title,description,categories,price
0,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,2.0,2012-06-11 16:41:10.000,Computers,Targus CleanVu Cleaning Pads Touch Screen Devi...,[The Targus CleanVu Cleaning Pads for iPad off...,"[Electronics, Television & Video, Accessories,...",6.99
1,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,3.0,2012-08-02 02:04:13.000,Computers,Arkon Folding Tablet Stand Compatible with App...,[IPM-TAB1 is a highly versatile folding tablet...,"[Electronics, Computers & Accessories, Tablet ...",14.95
2,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,5.0,2012-09-15 16:34:46.000,Amazon Devices,Amazon Kindle 9W PowerFast Adapter for Acceler...,[],"[Electronics, Computers & Accessories, Tablet ...",
3,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,5.0,2013-01-03 23:08:45.000,Computers,amFilm (TM) Premium Screen Protector Film Clea...,[Introducing amFilm® Premium Screen Protector ...,"[Electronics, Computers & Accessories, Tablet ...",
4,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,4.0,2013-05-06 01:24:39.000,Computers,Seagate Wireless Plus 1TB Portable Hard Drive ...,"[With Seagate Wireless Plus, you can enjoy you...","[Electronics, Computers & Accessories, Data St...",
...,...,...,...,...,...,...,...,...,...
3473,AE2GH2QNHK6AVGUOTECE6FOECE2A,B093H9PY4L,5.0,2021-12-03 18:03:38.676,All Electronics,"IVANKY 4K HDMI Cable 3.3 ft, High Speed 18Gbps...",[],"[Electronics, Television & Video, Accessories,...",5.99
3474,AHIIISHZP6YAVVHMDEBLJ5CWZ7ZA,B0BZ62FQ13,3.0,2021-07-16 17:08:55.044,Cell Phones & Accessories,"OtterBox iPhone SE 3rd & 2nd Gen, iPhone 8 & i...",[Commuter Series is the compact case that keep...,"[Electronics, Electronics Gift Guide - Top Tec...",
3475,AFTE3G43QHXWD3DJGDCI2DHEWQJQ,B08DMXDPW5,5.0,2021-01-14 01:48:09.423,Amazon Devices,Echo (4th Gen) | Glacier White with Sengled Bl...,[],[],114.98
3476,AFENZZDPVUYFVBS47YDOWJCDYBSQ,B09XBT6DS9,4.0,2021-12-05 00:35:40.874,Home Audio & Theater,Ultra Clarity Cables High Speed HDMI Extension...,[],"[Electronics, Television & Video, Accessories,...",10.99


In [17]:
holdout_rating_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,main_category,title,description,categories,price
49,AEMV4AQMTRHICJECLPLTC6SV6D3A,B00V7SFR8Y,5.0,2022-02-05 02:09:34.769,All Electronics,VCE HDMI Coupler HDMI Female to Female Connect...,[],"[Electronics, Home Audio, Home Audio Accessori...",5.99
50,AEMV4AQMTRHICJECLPLTC6SV6D3A,B077V2BF3C,5.0,2022-02-05 02:13:37.521,Computers,"Moread HDMI to VGA, 2 Pack, Gold-Plated HDMI t...",[],"[Electronics, Computers & Accessories, Compute...",14.99
51,AEMV4AQMTRHICJECLPLTC6SV6D3A,B0C556BW5M,5.0,2022-02-05 03:17:34.026,All Electronics,External CD/DVD Drive for Laptop USB 3.0 CD/DV...,[],"[Electronics, Computers & Accessories, Compute...",18.69
52,AEMV4AQMTRHICJECLPLTC6SV6D3A,B0921LXGF9,5.0,2022-02-05 03:37:03.183,Computers,SanDisk 250GB Extreme Portable External SSD - ...,[The ruggedized SanDisk Extreme Portable SSD d...,"[Electronics, Computers & Accessories, Data St...",
67,AG2CZRZGOVDCD5BBMCGA7CSTGQIQ,B07FP8VLXS,3.0,2022-01-21 01:20:36.518,Computers,Logitech K400 Plus Wireless Touch With Easy Me...,[The Logitech Wireless Touch Keyboard K400 Plu...,"[Electronics, Computers & Accessories, Compute...",22.99
...,...,...,...,...,...,...,...,...,...
3419,AGFRYVIF7CVPOK777KN3PSOSWSMA,B0BVZZ36ZL,5.0,2022-01-24 19:12:44.117,All Electronics,"Logitech B100 Corded Mouse, Wired USB Mouse fo...",[The B100 Optical Combo mouse. An office basic...,"[Electronics, Computers & Accessories, Compute...",7.64
3433,AFCCJP56LNBAKFMEOVL36F3SRCKQ,B0B2DD1SC2,1.0,2022-02-15 19:28:11.361,Computers,INLAND Micro Center SuperSpeed 2 Pack 128GB US...,[],"[Electronics, Computers & Accessories, Data St...",20.99
3435,AFFGZQBIHKANJVKYMYZ4DC7D3HDQ,B09Y3H915M,3.0,2022-01-29 19:14:46.007,All Electronics,Polk Audio Signa S3 Ultra-Slim TV Sound Bar an...,[The low-profile Signa S3 sound bar delivers a...,"[Electronics, Home Audio, Speakers, Sound Bars]",299.0
3456,AHA3P7VTPBXCMB22DPNLCKTCIUWQ,B01DN7OJ30,5.0,2022-01-23 18:55:32.961,Musical Instruments,Hosa YRA-167 Right Angle 3.5 mm TRS to Dual RC...,[The YRA-167 cable is designed to split a ster...,"[Electronics, Home Audio, Home Audio Accessori...",


# Persit to OLTP

In [18]:
host = os.getenv("POSTGRES_HOST")
port = os.getenv("POSTGRES_PORT")
database = os.getenv("POSTGRES_DB")
user = os.getenv("POSTGRES_USER")
password = os.getenv("POSTGRES_PASSWORD")
schema = os.getenv("POSTGRES_OLTP_SCHEMA")

In [19]:
print(schema)

experimental


In [20]:
connection_string = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
engine = create_engine(connection_string)

In [21]:
from sqlalchemy import create_engine, text
try:
    with engine.connect() as connection:
        result = connection.execute(text("SELECT 1"))
        print("Connection successful:", result.scalar() == 1)
except Exception as e:
    print("Connection failed:", e)


Connection successful: True


In [22]:
insert_chunk_to_oltp(full_df_train, engine, schema, args.transaction_table_name)

Ingesting chunks:   0%|          | 0/131 [00:00<?, ?it/s]

In [23]:
holdout_rating_df.to_parquet(args.holdout_fp, index=False)