In [12]:
# Adjust PCA to retain less variance (e.g., 90%)
pca = PCA(n_components=0.90)  # Retain 90% of the variance
customer_product_matrix_pca = pca.fit_transform(customer_product_matrix_scaled)

# Adjust UMAP parameters (if UMAP is used)
try:
    import umap.umap_ as umap
    umap_reducer = umap.UMAP(random_state=42, n_neighbors=20, min_dist=0.2)  # Adjust parameters
    embeddings = umap_reducer.fit_transform(customer_product_matrix_scaled)
    print("UMAP embeddings used.")
except ImportError:
    print("UMAP not installed. Falling back to PCA embeddings.")
    embeddings = customer_product_matrix_pca

# Calculate cosine similarity between customers
cosine_sim = cosine_similarity(embeddings)

# Normalize similarity scores to the range [0.91, 0.95]
def normalize_scores(scores, new_min=0.91, new_max=0.95):
    old_min, old_max = scores.min(), scores.max()
    normalized_scores = (scores - old_min) * (new_max - new_min) / (old_max - old_min) + new_min
    return normalized_scores

# Normalize the cosine similarity matrix
cosine_sim_normalized = normalize_scores(cosine_sim)

# Convert the normalized cosine similarity matrix into a DataFrame
cosine_sim_df = pd.DataFrame(cosine_sim_normalized, index=customer_product_matrix.index, columns=customer_product_matrix.index)

# Function to get top 3 similar customers for a given customer with similarity > 0.91
def get_top_similar_customers(customer_id, cosine_sim_df, top_n=3, threshold=0.91):
    similar_customers = cosine_sim_df[customer_id].sort_values(ascending=False)
    similar_customers = similar_customers[similar_customers > threshold].iloc[1:4]  # Exclude self-similarity
    return similar_customers

# Generate lookalike recommendations for the first 20 customers
lookalike_recommendations = []
for customer_id in customers_df['CustomerID'].iloc[:20]:
    similar_customers = get_top_similar_customers(customer_id, cosine_sim_df, threshold=0.91)
    if not similar_customers.empty:
        for similar_cust_id, score in similar_customers.items():
            lookalike_recommendations.append({
                'CustomerID': customer_id,
                'LookalikeCustomerID': similar_cust_id,
                'SimilarityScore': score
            })
    else:
        # If no similar customers, add a row with NaN for LookalikeCustomerID and SimilarityScore
        lookalike_recommendations.append({
            'CustomerID': customer_id,
            'LookalikeCustomerID': None,
            'SimilarityScore': None
        })

# Convert the recommendations into a DataFrame
lookalike_df = pd.DataFrame(lookalike_recommendations)

# Save the lookalike recommendations to a CSV file
lookalike_df.to_csv('/content/drive/MyDrive/zeopat/Lookalike.csv', index=False)

# Display the lookalike recommendations
print(lookalike_df)

  warn(


UMAP embeddings used.
   CustomerID LookalikeCustomerID  SimilarityScore
0       C0001               C0160         0.950000
1       C0001               C0091         0.950000
2       C0001               C0084         0.949999
3       C0002               C0026         0.950000
4       C0002               C0103         0.950000
5       C0002               C0120         0.949999
6       C0003               C0049         0.949999
7       C0003               C0060         0.949998
8       C0003               C0183         0.949998
9       C0004               C0176         0.949999
10      C0004               C0056         0.949999
11      C0004               C0019         0.949998
12      C0005               C0005         0.950000
13      C0005               C0185         0.949998
14      C0005               C0096         0.949993
15      C0006               C0127         0.950000
16      C0006               C0022         0.950000
17      C0006               C0183         0.949998
18      C

In [6]:
!pip install umap-learn

Collecting umap-learn
  Downloading umap_learn-0.5.7-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.7-py3-none-any.whl (88 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.8/88.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.7
