In [4]:
import json
import pandas as pd

# Step 1: Read all encoded item IDs from train/validation/test files
def read_encoded_item_ids(files):
    item_ids = set()
    for file in files:
        with open(file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                item_ids.update(map(int, parts[1:]))  # skip index, keep itemIDs
    return item_ids

# Step 2: Load encoded ID to real ID mapping
def load_item_num2id(path):
    with open(path, 'r') as f:
        return json.load(f)

# Step 3: Filter metadata for relevant items
def filter_metadata(real_item_ids, metadata_path):
    meta_df = pd.read_csv(metadata_path)
    meta_df = meta_df[meta_df['parent_asin'].isin(real_item_ids)]
    return meta_df

# Step 4: Main pipeline
def create_item_metadata_csv(train_path, val_path, test_path, item_map_path, metadata_path, output_path):
    # Step 1: Read encoded item IDs from all 3 files
    encoded_ids = read_encoded_item_ids([train_path, val_path, test_path])

    # Step 2: Load the list of real item IDs (it's a list, not a dict)
    item_num2id = json.load(open(item_map_path))  # list, index = encoded ID

    # Step 3: Map encoded IDs to real IDs using index lookup
    real_ids = [item_num2id[i] for i in encoded_ids if i < len(item_num2id)]

    print(f"Total encoded IDs: {len(encoded_ids)}")
    print(f"Mapped to real IDs: {len(real_ids)}")

    # Step 4: Load metadata and filter based on real IDs
    meta_df = pd.read_csv(metadata_path)
    meta_df['parent_asin'] = meta_df['parent_asin'].astype(str).str.strip()
    real_ids = set(real_ids)
    filtered_meta = meta_df[meta_df['parent_asin'].isin(real_ids)]

    print(f"Found {len(filtered_meta)} items in metadata")

    # Step 5: Select columns you want
    selected_columns = ['parent_asin', 'title', 'main_category', 'price', 'average_rating', 'rating_number']
    final_df = filtered_meta[selected_columns]

    # Step 6: Save the final CSV
    final_df.to_csv(output_path, index=False)
    print(f"Saved metadata for {len(final_df)} items to {output_path}")

# --- Run ---
create_item_metadata_csv(
    train_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/train.txt',
    val_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/validation.txt',
    test_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/test.txt',
    item_map_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/item_num2id.json',
    metadata_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/meta_Electronics.csv',
    output_path='/Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/filtered_item_metadata.csv'
)

Total encoded IDs: 9471
Mapped to real IDs: 9471


  meta_df = pd.read_csv(metadata_path)


Found 9471 items in metadata
Saved metadata for 9471 items to /Users/shayan/Desktop/Echomind/code/src/deep-learning/data/amazon-electronics/filtered_item_metadata.csv


In [None]:
from openai import OpenAI

responses = []

client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key="<OPENROUTER_API_KEY>",
)

completion = client.chat.completions.create(
  extra_headers={
    "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
    "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
  },
  extra_body={},
  model="deepseek/deepseek-r1:free",
  messages=[
    {
      "role": "user",
      "content": "Here are few items {responses}, give me "
    }
  ]
)
print(completion.choices[0].message.content)