# Explore Valid User IDs and Get Recommendations

This notebook lists valid `user_id` values from the processed dataset and queries the running FastAPI service for recommendations.


In [1]:
from pathlib import Path
from pyspark.sql import SparkSession

# Start Spark and load user map
spark = SparkSession.builder.appName("user-explore").getOrCreate()
user_map_path = Path("../data/processed/user_map.parquet")
user_map = spark.read.parquet(str(user_map_path))

print("Sample user_ids:")
user_ids = [r[0] for r in user_map.select("user_id").limit(20).collect()]
for uid in user_ids:
    print(uid)

print(f"\nTotal users: {user_map.count():,}")


Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
25/11/19 15:19:47 WARN Utils: Your hostname, andres-ThinkPad-L15-Gen-2, resolves to a loopback address: 127.0.1.1; using 192.168.1.11 instead (on interface wlp9s0)
25/11/19 15:19:47 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Using Spark's default log4j profile: org/apache/spark/log4j2-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/11/19 15:19:48 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Sample user_ids:


                                                                                

A2WLOSU1RW6TWC
A2WLQEY91NZMU0
A2WLRI5ONTW5T7
A2WLVS5XJU8WPZ
A2WM1G7B5YU0CT
A2WM3V2HY2A5LU
A2WM5CEGQTPDFK
A2WMEWFRJOD82Z
A2WMFRF7EP14U7
A2WMLZMLETBCLD
A2WMVHB96UXEEU
A2WN0GMMFS8M8F
A2WNJB2OIGP9RB
A2WNJNOH8YQ6D1
A2WNONSBFPS2S
A2WNXXVE3PJDX4
A2WO2E77X7YFAS
A2WO5D8W8ZXPC9
A2WO5O9H80Z994
A2WOCCS2BOZ2AG

Total users: 192,403


In [4]:
# Configure API base URL and choose a user_id from the list printed above
API_BASE = "http://localhost:8000"
USER_ID = user_ids[0] if user_ids else None  # you can override this manually
N = 5
print("Using:", API_BASE, "USER_ID=", USER_ID)


Using: http://localhost:8000 USER_ID= A2WLOSU1RW6TWC


In [None]:
import requests
import json

if not USER_ID:
    raise ValueError("USER_ID is not set")

resp = requests.get(f"{API_BASE}/recommendations", params={"user_id": USER_ID, "n": N}, timeout=30)
print("Status:", resp.status_code)
print(json.dumps(resp.json(), indent=2)[:2000])  # print first ~2KB


Status: 200
{
  "user_id": "A2WLOSU1RW6TWC",
  "items": [
    {
      "item_id": "B000N64DB8",
      "score": 5.29327917098999
    },
    {
      "item_id": "B005RF2VP0",
      "score": 5.2782816886901855
    },
    {
      "item_id": "B001TXBBQY",
      "score": 5.14666748046875
    },
    {
      "item_id": "B000VWG60A",
      "score": 5.137851238250732
    },
    {
      "item_id": "B00AGBPI3M",
      "score": 5.104048728942871
    }
  ]
}


25/11/19 18:32:55 WARN HeartbeatReceiver: Removing executor driver with no recent heartbeats: 1754697 ms exceeds timeout 120000 ms
25/11/19 18:32:55 WARN SparkContext: Killing executors is not supported by current scheduler.
25/11/19 18:32:57 WARN Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.SparkThreadUtils$.awaitResult(SparkThreadUtils.scala:53)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:342)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:101)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:85)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:81)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:669)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1296)
	at 