<center><h1 style="background-color: #C6F3CD; border-radius: 10px; color: #FFFFFF; padding: 5px;">
Building Recommendation engines using ALS
</h1><center/>

**Link to the article** : https://medium.com/@soulawalid/building-recommendation-engines-using-als-bf5823308b6b?sk=c31f611636789af3279db4a346301942

In [1]:
! pip install implicit pandas scipy numpy



In [2]:
import numpy as np
import pandas as pd
import scipy.sparse as sparse
from implicit.als import AlternatingLeastSquares

In [3]:
# Sample user-item interaction data
data = {
    'user_id': [0, 0, 1, 1, 2, 2, 3, 3],
    'item_id': [0, 1, 1, 2, 2, 3, 3, 0],
    'interaction': [1, 2, 3, 4, 5, 6, 7, 8]  # Could represent clicks, views, etc.
}

df = pd.DataFrame(data)

# Convert to sparse matrix format
user_item_matrix = sparse.coo_matrix(
    (df['interaction'], (df['user_id'], df['item_id']))
)

# ✅ Convert to CSR format (required for recommend function)
user_item_matrix = user_item_matrix.tocsr()

In [4]:
# Initialize ALS model
als_model = AlternatingLeastSquares(factors=5, regularization=0.1, iterations=10)

# Train model (Implicit library requires a "confidence" matrix, so we use .T)
als_model.fit(user_item_matrix.T)

  check_blas_config()


  0%|          | 0/10 [00:00<?, ?it/s]

In [5]:
user_id = 1  # Example user
recommendations = als_model.recommend(user_id, user_item_matrix.getrow(user_id), N=3)

In [6]:
item_mapping = {
    0: "Azurite",
    1: "Cyanite",
    2: "Larimar", 
    3: "Agate"
}

# Extract item IDs and scores
item_ids, scores = recommendations

# Create a clean display format
print("\n📌 Top Recommended Items:")
print("─────────────────────────")
for rank, (item_id, score) in enumerate(zip(item_ids, scores), start=1):
    if score > -1e+10:  # Ignore extreme negative scores
        item_name = item_mapping.get(item_id, f"Item {item_id}")  # Get name or default to ID
        print(f"🔹 Rank {rank}: {item_name} (ID: {item_id}) → Score: {score:.4f}")
print("─────────────────────────\n")


📌 Top Recommended Items:
─────────────────────────
🔹 Rank 1: Azurite (ID: 0) → Score: 0.9530
🔹 Rank 2: Agate (ID: 3) → Score: 0.0244
─────────────────────────



Since Azurite (ID: 0) has the highest recommendation score (0.9485), the ALS model suggests that User 1 is most likely to prefer Azurite over the other options

# Explicit 

In [7]:
pip install pyspark

Note: you may need to restart the kernel to use updated packages.


In [8]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.sql.functions import col

In [9]:
spark = SparkSession.builder.appName("ALSExample").getOrCreate()

In [10]:
ratings_data = [
    (0, 0, 5.0), (0, 1, 3.0), (1, 1, 4.0),
    (1, 2, 4.0), (2, 2, 5.0), (2, 3, 3.0),
    (3, 3, 4.0), (3, 0, 2.0)
]
columns = ["user_id", "item_id", "rating"]

ratings_df = spark.createDataFrame(ratings_data, columns)
ratings_df.show()

+-------+-------+------+
|user_id|item_id|rating|
+-------+-------+------+
|      0|      0|   5.0|
|      0|      1|   3.0|
|      1|      1|   4.0|
|      1|      2|   4.0|
|      2|      2|   5.0|
|      2|      3|   3.0|
|      3|      3|   4.0|
|      3|      0|   2.0|
+-------+-------+------+



In [11]:
als = ALS(
    maxIter=10,  # Number of iterations
    regParam=0.1,  # Regularization parameter
    userCol="user_id",
    itemCol="item_id",
    ratingCol="rating",
    coldStartStrategy="drop"  # Drop users/items with no history
)

model = als.fit(ratings_df)

In [12]:
user_recommendations = model.recommendForAllUsers(3)
user_recommendations.show(truncate=False)

+-------+------------------------------------------------+
|user_id|recommendations                                 |
+-------+------------------------------------------------+
|0      |[{0, 4.843926}, {1, 3.0059536}, {2, 2.9161665}] |
|1      |[{2, 4.0052705}, {1, 3.8317149}, {0, 3.2888417}]|
|2      |[{2, 4.8638043}, {1, 3.3661618}, {3, 3.0026472}]|
|3      |[{3, 3.845995}, {2, 3.3937259}, {0, 2.0039403}] |
+-------+------------------------------------------------+



In [13]:
item_mapping = {
    0: "Azurite",
    1: "Cyanite",
    2: "Larimar", 
    3: "Agate"
}

In [14]:
from pyspark.sql.functions import udf
from pyspark.sql.types import ArrayType, StructType, StructField, StringType, FloatType

# Define a UDF to replace item IDs with names
def map_items(recommendations):
    return [(item_mapping.get(item_id, "Unknown"), score) for item_id, score in recommendations]

map_items_udf = udf(map_items, ArrayType(StructType([
    StructField("item_name", StringType(), True),
    StructField("rating", FloatType(), True)
])))

In [15]:
# Apply mapping to user recommendations
user_recommendations_named = user_recommendations.withColumn(
    "recommendations", map_items_udf("recommendations")
)

# Show results
user_recommendations_named.show(truncate=False)

+-------+------------------------------------------------------------------+
|user_id|recommendations                                                   |
+-------+------------------------------------------------------------------+
|0      |[{Azurite, 4.843926}, {Cyanite, 3.0059536}, {Larimar, 2.9161665}] |
|1      |[{Larimar, 4.0052705}, {Cyanite, 3.8317149}, {Azurite, 3.2888417}]|
|2      |[{Larimar, 4.8638043}, {Cyanite, 3.3661618}, {Agate, 3.0026472}]  |
|3      |[{Agate, 3.845995}, {Larimar, 3.3937259}, {Azurite, 2.0039403}]   |
+-------+------------------------------------------------------------------+



# Weights and Multiple Interactions

In [16]:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from implicit.als import AlternatingLeastSquares

# Define weights for different interactions
WEIGHT_LIKE = 0.4
WEIGHT_COMMENT = 0.6

# Sample data with separate interactions
data = {
    'user_id': [0, 0, 1, 1, 2, 2, 3, 3],
    'item_id': [0, 1, 1, 2, 2, 3, 3, 0],
    'likes':    [2, 1, 3, 0, 5, 2, 0, 4],  # Number of likes
    'comments': [0, 3, 1, 4, 0, 6, 7, 2]   # Number of comments
}

df = pd.DataFrame(data)

# Compute a weighted interaction score
df["interaction_score"] = df["likes"] * WEIGHT_LIKE + df["comments"] * WEIGHT_COMMENT

df

Unnamed: 0,user_id,item_id,likes,comments,interaction_score
0,0,0,2,0,0.8
1,0,1,1,3,2.2
2,1,1,3,1,1.8
3,1,2,0,4,2.4
4,2,2,5,0,2.0
5,2,3,2,6,4.4
6,3,3,0,7,4.2
7,3,0,4,2,2.8


In [17]:
# Convert DataFrame to sparse matrix format
user_ids = df["user_id"].astype(np.int32)
item_ids = df["item_id"].astype(np.int32)
scores = df["interaction_score"].astype(np.float32)

user_item_matrix = coo_matrix((scores, (user_ids, item_ids)))

# Convert to CSR format (required for ALS)
user_item_matrix = user_item_matrix.tocsr()

In [18]:
# Train ALS model
als_model = AlternatingLeastSquares(factors=5, regularization=0.1, iterations=10)

# ALS expects item-user matrix, so transpose it
als_model.fit(user_item_matrix.T)



  0%|          | 0/10 [00:00<?, ?it/s]

In [19]:
user_id = 1  # Example user

recommendations = als_model.recommend(user_id, user_item_matrix.getrow(user_id), N=3)

# Item name mapping
item_mapping = {
    0: "Azurite",
    1: "Cyanite",
    2: "Larimar", 
    3: "Agate"
}

# Extract item IDs and scores
item_ids, scores = recommendations

print("\n📌 Top Recommended Items:")
print("─────────────────────────")
for rank, (item_id, score) in enumerate(zip(item_ids, scores), start=1):
    if score > -1e+10:  # Ignore extreme negative scores
        item_name = item_mapping.get(item_id, f"Item {item_id}")  
        print(f"🔹 Rank {rank}: {item_name} (ID: {item_id}) → Score: {score:.4f}")
print("─────────────────────────\n")


📌 Top Recommended Items:
─────────────────────────
🔹 Rank 1: Azurite (ID: 0) → Score: 0.9587
🔹 Rank 2: Agate (ID: 3) → Score: 0.0162
─────────────────────────

