In [21]:
import pandas as pd
import numpy as np

data = pd.read_csv('customer_data.csv')

data.fillna(0, inplace=True)
data['return'] = data['return_date'].apply(lambda x: 1 if x != 0 else 0)
data['purchase_year'] = pd.to_datetime(data['purchase_date']).dt.year
data['purchase_month'] = pd.to_datetime(data['purchase_date']).dt.month

data.drop(columns=['purchase_date', 'return_date'], inplace=True)


In [22]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X = data.drop(columns=['customer_id', 'product_id', 'return'])
y = data['return']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')


Accuracy: 0.9


In [23]:

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder

le_customer = LabelEncoder()
le_product = LabelEncoder()

df_agg['customer_id'] = le_customer.fit_transform(df_agg['customer_id'])
df_agg['product_id'] = le_product.fit_transform(df_agg['product_id'])
df_agg = df.groupby(['customer_id', 'product_id'])['purchase_amount'].sum().reset_index()
user_item_matrix = df_agg.pivot(index='customer_id', columns='product_id', values='purchase_amount').fillna(0)

similarity_matrix = cosine_similarity(user_item_matrix)

def recommend_products(customer_id, num_recommendations=5):
    customer_idx = le_customer.transform([customer_id])[0]
    similar_customers = similarity_matrix[customer_idx]
    similar_customers_idx = similar_customers.argsort()[-num_recommendations:][::-1]

    recommended_products = []
    for idx in similar_customers_idx:
        recommended_products.extend(user_item_matrix.columns[user_item_matrix.iloc[idx].to_numpy().argsort()[-num_recommendations:][::-1]])

    recommended_products = list(dict.fromkeys(recommended_products))[:num_recommendations]

    return le_product.inverse_transform(recommended_products)

print(recommend_products('C001', num_recommendations=5))


['P011' 'P020' 'P009' 'P002' 'P003']


In [26]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np

app = FastAPI()

class CustomerData(BaseModel):
    customer_id: str
    features: list

@app.post("/predict_return/")
def predict_return(data: CustomerData):
    features = np.array(data.features).reshape(1, -1)
    prediction = clf.predict(features)[0]
    return {"customer_id": data.customer_id, "return_prediction": prediction}

@app.post("/recommend_products/")
def recommend(data: CustomerData):
    try:
        recommendations = recommend_products(data.customer_id)
        return {"customer_id": data.customer_id, "recommendations": recommendations}
    except:
        raise HTTPException(status_code=404, detail="Customer not found")
