# ALS Training on Google Colab (Spark MLlib)

Run this notebook when your laptop may sleep. Colab keeps running.

**Steps:**
1. Upload `interactions_hm.csv` (or `interactions_rr.csv`) to Colab
2. Install PySpark
3. Run all cells
4. Download `user_factors.csv` and `item_factors.csv`

In [None]:
!pip install -q pyspark

In [None]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.sql import functions as F
import shutil
from pathlib import Path

INPUT_CSV = "/content/interactions_hm.csv"  # Change to interactions_rr.csv for RR
OUTPUT_DIR = Path("/content/als_output")
RANK = 32
MAX_ITER = 5
REG_PARAM = 0.1

In [None]:
spark = (
    SparkSession.builder
    .master("local[*]")
    .appName("ALS-Colab")
    .config("spark.driver.memory", "4g")
    .getOrCreate()
)
spark.sparkContext.setLogLevel("WARN")
print("Spark session ready.")

In [None]:
df = spark.read.csv(INPUT_CSV, header=True, inferSchema=True)
df = df.selectExpr("user as userId", "item as itemId", "value as rating")
n = df.count()
print(f"Loaded {n:,} interactions")

In [None]:
als = ALS(
    rank=RANK,
    maxIter=MAX_ITER,
    regParam=REG_PARAM,
    userCol="userId",
    itemCol="itemId",
    ratingCol="rating",
    seed=42,
    coldStartStrategy="drop",
)
model = als.fit(df)
print("ALS training complete.")

In [None]:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

def explode_factors(df, rank):
    for i in range(rank):
        df = df.withColumn(f"f{i}", F.col("features")[i])
    return df.select(["id"] + [f"f{i}" for i in range(rank)])

def write_factors(df, name):
    exploded = explode_factors(df, RANK)
    tmp = OUTPUT_DIR / f"{name}_tmp"
    exploded.coalesce(1).write.mode("overwrite").option("header", "true").csv(str(tmp))
    parts = list(tmp.glob("part-*.csv"))
    if parts:
        shutil.move(str(parts[0]), str(OUTPUT_DIR / f"{name}.csv"))
    shutil.rmtree(tmp, ignore_errors=True)

write_factors(model.userFactors, "user_factors")
write_factors(model.itemFactors, "item_factors")
print("Factors written. Download user_factors.csv and item_factors.csv from /content/als_output/")

In [None]:
# Optional: zip for easy download
!cd /content/als_output && zip -r ../als_factors.zip user_factors.csv item_factors.csv