In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load and preprocess the data
bid_df = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\bid.csv")
auction_df = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\auction.csv")
x = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\merge.csv")

# Create a user-item matrix
x = x.groupby(['buyer_id', 'make', 'model'])['bid_amount'].max().reset_index()
user_item_matrix = x.pivot_table(
    index='buyer_id',
    columns=['make', 'model'],
    values='bid_amount',
    aggfunc='max'
).fillna(0)

# Calculate user similarities
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Content-based filtering setup
x['combined_features'] = x['make'] + " " + x['model']
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(x['combined_features'].fillna(''))
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(x.index, index=x['buyer_id']).drop_duplicates()

# Content-based recommendation function
def get_content_based_recommendations(buyer_id, df, cosine_sim, num_recommend=10):
    # Check if the buyer_id exists in the indices
    if buyer_id not in indices.index:
        print(f"No bids found for buyer_id {buyer_id}. Recommending based on overall popularity.")
        overall_popularity = df.groupby(['make', 'model'])['bid_amount'].mean().sort_values(ascending=False)
        popular_cars = overall_popularity.head(num_recommend).reset_index()
        return popular_cars[['make', 'model']]
    
    # Get the indices of cars the buyer has bid on
    buyer_bids = df[df['buyer_id'] == buyer_id].index.tolist()
    # Create a variable that contains make and model of the cars the buyer has previously bid on
    previous_bids = df.loc[buyer_bids, ['make', 'model']].drop_duplicates().values.tolist()
    previous_bids_set = set(map(tuple, previous_bids))  # Convert to set for faster lookup

    # Calculate similarity scores for all cars
    sim_scores = cosine_sim[buyer_bids].mean(axis=0)
    # Get indices of cars sorted by similarity scores
    similar_car_indices = sim_scores.argsort()[::-1]
    # Ensure no repetitions in recommendations and filter out previously bid cars
    unique_recommendations = []
    seen = set()
    
    for idx in similar_car_indices:
        car = (df.at[idx, 'make'], df.at[idx, 'model'])
        if car not in seen and car not in previous_bids_set:
            unique_recommendations.append(idx)
            seen.add(car)
        if len(unique_recommendations) == num_recommend:
            break
    
    # Get recommended cars details
    recommended_cars = df.iloc[unique_recommendations][['make', 'model']].drop_duplicates()
    
    return recommended_cars

# Collaborative filtering prediction function
def predict_interest(user_id, make, model, user_similarity_df, user_item_matrix):
    if user_id not in user_similarity_df.index:
        return 0
    
    similar_users = user_similarity_df[user_id].drop(user_id, errors='ignore')
    similar_users = similar_users[similar_users > 0].sort_values(ascending=False)
    
    if similar_users.empty:
        return 0
    
    weighted_sum = 0
    similarity_sum = 0
    for sim_user_id, similarity in similar_users.items():
        if user_item_matrix.at[sim_user_id, (make, model)] > 0:
            weighted_sum += similarity * user_item_matrix.at[sim_user_id, (make, model)]
            similarity_sum += similarity
    
    if similarity_sum == 0:
        return 0
    
    return weighted_sum / similarity_sum

# Hybrid recommendation function
def recommend_auctions(user_id, user_similarity_df, user_item_matrix, auction_df, cosine_sim, df, num_recommendations=5, cf_weight=0.7, cb_weight=0.3):
    if user_id not in user_item_matrix.index:
        overall_popularity = user_item_matrix.mean(axis=0).sort_values(ascending=False)
        recommended_auctions = overall_popularity.head(num_recommendations).index
        return list(recommended_auctions), len(recommended_auctions)
    
    user_interactions = user_item_matrix.loc[user_id]
    interacted_items = set(user_interactions[user_interactions > 0].index)
    
    predicted_interests_cf = {}
    for (make, model) in user_item_matrix.columns:
        if (make, model) not in interacted_items:
            predicted_interests_cf[(make, model)] = predict_interest(user_id, make, model, user_similarity_df, user_item_matrix)
    
    recommended_auctions_cf = sorted(predicted_interests_cf.items(), key=lambda x: x[1], reverse=True)[:num_recommendations]
    print(recommended_auctions_cf)
    print("\n")
    recommended_auctions_cb = get_content_based_recommendations(user_id, df, cosine_sim, num_recommend=num_recommendations)
    print(recommended_auctions_cb)
    
    combined_scores = {}
    
    for (make, model), score in recommended_auctions_cf:
        combined_scores[(make, model)] = cf_weight * score
    
    for _, row in recommended_auctions_cb.iterrows():
        car = (row['make'], row['model'])
        if car in combined_scores:
            combined_scores[car] += cb_weight
        else:
            combined_scores[car] = cb_weight
    
    combined_recommendations = [(make, model) for (make, model), score in sorted(combined_scores.items(), key=lambda x: x[1], reverse=True) if (make, model) not in interacted_items]
    
    return combined_recommendations[:num_recommendations], len(combined_recommendations)

# Example: Recommend auctions for buyer 333
buyer_id = 134
recommended_auction_details, num_recommendations = recommend_auctions(buyer_id, user_similarity_df, user_item_matrix, auction_df, cosine_sim, x, num_recommendations=5)
print(f"Recommended auctions: {recommended_auction_details}")
#print(f"Number of recommendations: {num_recommendations}")

# Example: Recommend auctions for a new buyer (not in user_item_matrix)
# new_buyer_id = 1  
# recommended_auction_details, num_recommendations = recommend_auctions(new_buyer_id, user_similarity_df, user_item_matrix, auction_df, cosine_sim, x, num_recommendations=5)
# print(f"Recommended auctions for new buyer: {recommended_auction_details}")
# print(f"Number of recommendations: {num_recommendations}")


In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load and preprocess the data
bid_df = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\bid.csv")
auction_df = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\auction.csv")
x = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\merge.csv")

# Create a user-item matrix
x = x.groupby(['buyer_id', 'make', 'model'])['bid_amount'].max().reset_index()
user_item_matrix = x.pivot_table(
    index='buyer_id',
    columns=['make', 'model'],
    values='bid_amount',
    aggfunc='max'
).fillna(0)

# Calculate user similarities
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Content-based filtering setup
x['combined_features'] = x['make'] + " " + x['model']
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(x['combined_features'].fillna(''))
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
indices = pd.Series(x.index, index=x['buyer_id']).drop_duplicates()

# Content-based recommendation function
def get_content_based_recommendations(buyer_id, df, cosine_sim, num_recommend=10):
    # Check if the buyer_id exists in the indices
    if buyer_id not in indices.index:
        print(f"No bids found for buyer_id {buyer_id}. Recommending based on overall popularity.")
        overall_popularity = df.groupby(['make', 'model'])['bid_amount'].mean().sort_values(ascending=False)
        popular_cars = overall_popularity.head(num_recommend).reset_index()
        return [], popular_cars[['make', 'model']].values.tolist()
    
    # Get the indices of cars the buyer has bid on
    buyer_bids = df[df['buyer_id'] == buyer_id].index.tolist()
    # Create a variable that contains make and model of the cars the buyer has previously bid on
    previous_bids = df.loc[buyer_bids, ['make', 'model']].drop_duplicates().values.tolist()
    previous_bids_set = set(map(tuple, previous_bids))  # Convert to set for faster lookup

    # Calculate similarity scores for all cars
    sim_scores = cosine_sim[buyer_bids].mean(axis=0)
    # Get indices of cars sorted by similarity scores
    similar_car_indices = sim_scores.argsort()[::-1]
    # Ensure no repetitions in recommendations and filter out previously bid cars
    unique_recommendations = []
    seen = set()
    
    for idx in similar_car_indices:
        car = (df.at[idx, 'make'], df.at[idx, 'model'])
        if car not in seen and car not in previous_bids_set:
            unique_recommendations.append(car)
            seen.add(car)
        if len(unique_recommendations) == num_recommend:
            break
    
    return previous_bids, unique_recommendations

# Collaborative filtering prediction function
def predict_interest(user_id, make, model, user_similarity_df, user_item_matrix):
    if user_id not in user_similarity_df.index:
        return 0
    
    similar_users = user_similarity_df[user_id].drop(user_id, errors='ignore')
    similar_users = similar_users[similar_users > 0].sort_values(ascending=False)
    
    if similar_users.empty:
        return 0
    
    weighted_sum = 0
    similarity_sum = 0
    for sim_user_id, similarity in similar_users.items():
        if user_item_matrix.at[sim_user_id, (make, model)] > 0:
            weighted_sum += similarity * user_item_matrix.at[sim_user_id, (make, model)]
            similarity_sum += similarity
    
    if similarity_sum == 0:
        return 0
    
    return weighted_sum / similarity_sum

# Hybrid recommendation function
def recommend_auctions(user_id, user_similarity_df, user_item_matrix, auction_df, cosine_sim, df, num_recommendations=5, cf_weight=0.7, cb_weight=0.3):
    if user_id not in user_item_matrix.index:
        overall_popularity = user_item_matrix.mean(axis=0).sort_values(ascending=False)
        recommended_auctions = overall_popularity.head(num_recommendations).index
        return [], list(recommended_auctions)
    
    user_interactions = user_item_matrix.loc[user_id]
    interacted_items = set(user_interactions[user_interactions > 0].index)
    
    predicted_interests_cf = {}
    for (make, model) in user_item_matrix.columns:
        if (make, model) not in interacted_items:
            predicted_interests_cf[(make, model)] = predict_interest(user_id, make, model, user_similarity_df, user_item_matrix)
    
    recommended_auctions_cf = sorted(predicted_interests_cf.items(), key=lambda x: x[1], reverse=True)[:num_recommendations]
    recommended_auctions_cb = get_content_based_recommendations(user_id, df, cosine_sim, num_recommend=num_recommendations)[1]
    
    combined_scores = {}
    
    for (make, model), score in recommended_auctions_cf:
        combined_scores[(make, model)] = cf_weight * score
    
    for make, model in recommended_auctions_cb:
        if (make, model) in combined_scores:
            combined_scores[(make, model)] += cb_weight
        else:
            combined_scores[(make, model)] = cb_weight
    
    combined_recommendations = [(make, model) for (make, model), score in sorted(combined_scores.items(), key=lambda x: x[1], reverse=True) if (make, model) not in interacted_items]
    
    previous_bids = [f"{make} {model}" for make, model in interacted_items]
    
    return previous_bids, combined_recommendations[:num_recommendations]

# Generate recommendations for all buyers
recommendations = []
for buyer_id in x['buyer_id'].unique():
    previously_bid_cars, recommended_cars = recommend_auctions(buyer_id, user_similarity_df, user_item_matrix, auction_df, cosine_sim, x, num_recommendations=5)
    row = [buyer_id, previously_bid_cars] + recommended_cars
    recommendations.append(row)

# Determine the maximum number of recommendations
max_recommendations = 5

# Create column names
columns = ["buyer_id", "previously_bid"] + [f"Recommendation {i+1}" for i in range(max_recommendations)]

# Convert recommendations to DataFrame
recommendations_df = pd.DataFrame(recommendations, columns=columns)

# Save to Excel
recommendations_df.to_excel(r"C:\Users\Harsh\Desktop\hybrid.xlsx", index=False)

print("Recommendations have been saved to hybrid_recommendations_with_bids.xlsx")


Recommendations have been saved to hybrid_recommendations_with_bids.xlsx
