In [None]:
import requests
import pandas as pd
import numpy as np
import json
from typing import List, Dict, Any

In [None]:
AUTH_URL    = "http://localhost:8080/auth/public/login"
ORDERS_API  = "http://localhost:8080/orders/secure/recommend"
PRODUCT_API = "http://localhost:8080/product/public/recommend"
USERNAME    = "admin"
PASSWORD    = "123456"

# ----- OPTIONAL DATE FILTERS (ISO format) -----
START_DATE = None          # e.g. "2025-11-01"
END_DATE   = None          # e.g. "2025-11-07"

# ========================== 1. GET TOKEN ==========================
def get_token() -> str:
    r = requests.post(AUTH_URL, json={"username": USERNAME, "password": PASSWORD})
    r.raise_for_status()
    token = r.json().get("token") or r.json().get("accessToken")
    if not token:
        raise ValueError("Token missing")
    print("Token OK")
    return token

token   = get_token()
headers = {"Authorization": f"Bearer {token}"}

In [None]:
def fetch_orders() -> List[Dict[str, Any]]:
    params = {"status": "DELIVERED"}
    if START_DATE:
        params["startDate"] = START_DATE
    if END_DATE:
        params["endDate"] = END_DATE

    r = requests.get(ORDERS_API, headers=headers, params=params)
    r.raise_for_status()
    raw = r.json()

    data_block = raw["data"]
    if isinstance(data_block, str):
        data_block = json.loads(data_block)

    orders = data_block["content"]

    orders_filtered = [
        {
            "orderId": order["id"],
            "customerId": order.get("customerId"),
            # "orderDate": order.get("orderDate"),
            "totalAmount": order.get("totalAmount"),
            "items": [
                {
                    "variantId": item.get("variantId"),
                    "productId": item.get("productId"),
                    "quantity": item.get("quantity"),
                    "unitPrice": item.get("unitPrice"),
                    "variantName": item.get("variantName"),
                }
                for item in order.get("items", [])
            ],
        }
        for order in orders
    ]

    return orders_filtered

orders_raw = fetch_orders()
print("\n--- SAMPLE RATINGS (first 1) ---")
print(orders_raw[:1])

In [None]:
def fetch_reviews() -> List[Dict[str, Any]]:
    params = {"status": "DELIVERED"}
    if START_DATE:
        params["startDate"] = START_DATE
    if END_DATE:
        params["endDate"] = END_DATE
    r = requests.get(PRODUCT_API, headers=headers,params=params)
    r.raise_for_status()
    raw = r.json()
    data_block = raw
    if isinstance(data_block, str):
        data_block = json.loads(data_block)

    # reviews may be a plain list or also have "content"
    reviews = data_block
    reviews_filtered = [
    {
        "reviewId": review["id"],
        "orderId": review.get("orderId"),
        "productId": review.get("productId"),
        "variantId": review.get("variantId"),
        "customerId": review.get("customerId"),
        "rating": review.get("rating"),
        # "createdAt": review.get("createdAt")
    }
    for review in reviews
]
    return reviews_filtered

reviews_raw = fetch_reviews()
print("\n--- SAMPLE REVIEWS (first 1) ---")
print(reviews_raw[:1])

In [None]:
def process_orders(orders: List[Dict[str, Any]]) -> pd.DataFrame:
    # Convert to DataFrame
    df = pd.DataFrame(orders)
    # Explode items if necessary
    if 'items' in df.columns:
        df = df.explode('items').reset_index(drop=True)
        items_df = pd.json_normalize(df['items'])
        df = pd.concat([df.drop(columns=['items']), items_df], axis=1)
    orders_agg=df.groupby(['customerId', 'productId']).agg({'quantity':'sum'}).reset_index()
    orders_agg['implicit_rating']=np.minimum(orders_agg['quantity']*2.5,5.0)
    orders_agg['source']='order'
    return orders_agg
orders_agg=process_orders(orders_raw)
print(orders_agg)


In [None]:
def process_reviews(reviews: List[Dict[str, Any]]) -> pd.DataFrame:
    df = pd.DataFrame(reviews)
    reviews_agg=df.groupby(['customerId', 'productId']).agg({'rating':'mean'}).reset_index()
    reviews_agg['source']='review'
    return reviews_agg
reviews_agg=process_reviews(reviews_raw)
print(reviews_agg)

In [None]:
merged = pd.merge(orders_agg, reviews_agg, on=['customerId', 'productId'], how='outer')
print("Raw Merged:\n", merged.head())  # Check for NaNs

In [None]:
merged['final_rating'] = np.where(merged['rating'].notna(), merged['rating'], merged['implicit_rating'])
merged['source']=merged['source_y'].fillna(merged['source_x'])
interactions_df=merged[['customerId', 'productId', 'final_rating', 'source','quantity']].copy()
interactions_df=interactions_df.dropna(subset=['final_rating'])
print("Interactions DF:\n", interactions_df)

In [None]:
user_item_matrix = interactions_df.pivot_table(index='customerId', columns='productId', values='final_rating',fill_value=0)
print("User-Item Matrix:\n", user_item_matrix)

In [None]:
np.random.seed(44)  # Reproducible fakes

print( user_item_matrix.columns.tolist())
products=[13,14,15,16]
fake_ids=np.arange(3, 20)
fake_data = []
real_avg_ratings = interactions_df.groupby('productId')['final_rating'].mean().to_dict()
for cust_id in fake_ids:
    num_interactions = np.random.randint(2, 5)  # 2-4 products per cust
    selected_products = np.random.choice(products, size=num_interactions, replace=False)
    for prod_id in selected_products:
        real_avg = real_avg_ratings.get(prod_id, 3.0)  
        rating = np.clip(np.random.normal(real_avg, 1.0), 1, 5)
        
        quantity = np.random.randint(1, 2) 
        
        fake_data.append({
            'customerId': cust_id,
            'productId': prod_id,
            'final_rating': round(rating, 1), 
            'source': 'synthetic',
            'quantity': quantity
        })

fake_df = pd.DataFrame(fake_data)
print("Sample Fake:\n", fake_df.head())


In [None]:
augmented_df = pd.concat([interactions_df, fake_df], ignore_index=True)
print(augmented_df.head(10))
augmented_df=augmented_df.drop_duplicates(subset=['customerId', 'productId'], keep='last')
augmented_df=augmented_df.sort_values(['customerId', 'productId']).reset_index(drop=True)

print(f"Clean Augmented: {len(augmented_df)} pairs")
print(augmented_df.groupby('source').size())

In [None]:
augmented_matrix= augmented_df.pivot_table(
    index='customerId',
    columns='productId',
    values='final_rating',
    fill_value=0)
print("Augmented Matrix Shape:", augmented_matrix.shape)
print(augmented_matrix.head())

In [None]:
sparsity=1.0-(augmented_matrix>0).sum().sum()/(augmented_matrix.shape[0]*augmented_matrix.shape[1])
print(f"New Sparsity: {sparsity*100:.1f}%")
augmented_df.to_csv("augmented_interactions.csv", index=False)
print("Saved! Use augmented_df or augmented_matrix for modeling.")

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

scaler=MinMaxScaler()
normalized_matrix = scaler.fit_transform(augmented_matrix)
normalized_df=pd.DataFrame(normalized_matrix, index=augmented_matrix.index, columns=augmented_matrix.columns)
print("Normalized Sample (Cust1):\n", normalized_df.loc[1].head())  # Ratings 0-1

In [None]:
norm_array= normalized_df.values
similarity_matrix = cosine_similarity(norm_array)
sim_df=pd.DataFrame(similarity_matrix, index=augmented_matrix.index, columns=augmented_matrix.index)
print("Similarity Sample (Cust1 to others):\n", sim_df.loc[1].sort_values(ascending=False).head())

In [None]:
def get_recommendations(customer_id,matrix=augmented_matrix,sim_df=sim_df,k=5,n=3):
    if customer_id not in matrix.index:
        return "New customer: Recommend popular products (e.g., top avg rated)."
    similar_custs = sim_df.loc[customer_id].sort_values(ascending=False).iloc[1:k+1].index.tolist()
    print(f"Top {k} similar to {customer_id}: {similar_custs}")

    customer_ratings= matrix.loc[customer_id]
    unseen=customer_ratings[customer_ratings==0].index.tolist()
    if not unseen:
        return "No new recs! All seen!"
    
    predictions = {}
    for prod in unseen:
        weighted_sum=0
        sim_sum=0
        for sim_cust in similar_custs:
            if(matrix.loc[sim_cust,prod]>0):
                sim_score=sim_df.loc[customer_id,sim_cust]
                rating=matrix.loc[sim_cust,prod]
                weighted_sum += sim_score * rating
                sim_sum += sim_score
        if sim_sum>0:
            predictions[prod]=weighted_sum/sim_sum
        else:
            predictions[prod]=0
    
    recs=sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:n]
    return [(prod, round(score,2)) for prod, score in recs]

# Test on real cust
recs = get_recommendations(1, n=2)
print(f"Recs for Cust1: {recs}")

# Popular fallback (for cold starts)
popular = augmented_matrix.mean().sort_values(ascending=False).head(3)
print("Global Popular:", popular)