In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the data
df = pd.read_csv(r"C:\Users\Harsh\Desktop\SheerDrive\merge.csv")
df['make'] = df['make'].str.upper()
df['model'] = df['model'].str.upper()
df['vehicle_type'] = df['vehicle_type'].str.upper()

# Create a combined feature including make, model, and vehicle_type
df['combined_features'] = df['make'] + " " + df['model'] + " " + df['vehicle_type']

# Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the combined features
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'].fillna(''))

# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Create a reverse mapping of indices and buyer IDs
indices = pd.Series(df.index, index=df['buyer_id']).drop_duplicates()

# Function to get car recommendations based on content
def get_recommendations(buyer_id, num_recommend=10):
    # Check if the buyer_id exists in the indices
    if buyer_id not in indices.index:
        print(f"No bids found for buyer_id {buyer_id}. Recommending based on overall popularity.")
        overall_popularity = df.groupby(['make', 'model', 'vehicle_type'])['bid_amount'].mean().sort_values(ascending=False)
        popular_cars = overall_popularity.head(num_recommend).reset_index()
        return popular_cars[['make', 'model', 'vehicle_type']]
    
    # Get the indices of cars the buyer has bid on
    buyer_bids = df[df['buyer_id'] == buyer_id].index.tolist()
    
    # Create a variable that contains make, model, and vehicle_type of the cars the buyer has previously bid on
    previous_bids = df.loc[buyer_bids, ['make', 'model', 'vehicle_type']].drop_duplicates().values.tolist()
    previous_bids_set = set(map(tuple, previous_bids))  # Convert to set for faster lookup

    print(f"Previous bids for buyer {buyer_id}: {previous_bids}")
    
    # Calculate similarity scores for all cars
    sim_scores = cosine_sim[buyer_bids].mean(axis=0)
    
    # Get indices of cars sorted by similarity scores
    similar_car_indices = sim_scores.argsort()[::-1]
    
    # Ensure no repetitions in recommendations and filter out previously bid cars
    unique_recommendations = []
    seen = set()
    vehicle_type_counts = df.loc[buyer_bids, 'vehicle_type'].value_counts()
    print(vehicle_type_counts)
#     for vehicle_type in vehicle_type_counts.index:
#         print(vehicle_type)
#         for idx in similar_car_indices:
#         #for vehicle_type in vehicle_type_counts.index:
            
# #              if len(unique_recommendations) >= num_recommend:
# #                     break
#             car = (df.at[idx, 'make'], df.at[idx, 'model'], df.at[idx, 'vehicle_type'])
#             if car not in seen and car not in previous_bids_set and df.at[idx, 'vehicle_type'] == vehicle_type:
#                 unique_recommendations.append((idx, sim_scores[idx]))
#                 seen.add(car)
#             if len(unique_recommendations) == num_recommend:
#                 break
    
#     # If not enough recommendations, add more based on overall similarity
#     if len(unique_recommendations) < num_recommend:
#         for idx in similar_car_indices:
#             #if len(unique_recommendations) >= num_recommend:
#                 #break
#             car = (df.at[idx, 'make'], df.at[idx, 'model'], df.at[idx, 'vehicle_type'])
#             if car not in seen and car not in previous_bids_set:
#                 unique_recommendations.append((idx, sim_scores[idx]))
#                 seen.add(car)
#             if len(unique_recommendations)==num_recommend:
#                 break
    for idx in similar_car_indices:
        car = (df.at[idx, 'make'], df.at[idx, 'model'], df.at[idx, 'vehicle_type'])
        if car not in seen and car not in previous_bids_set and df.at[idx, 'vehicle_type'] in vehicle_type_counts.index:
            unique_recommendations.append((idx, sim_scores[idx]))
            seen.add(car)
        if len(unique_recommendations) == num_recommend:
            break
    if len(unique_recommendations)<num_recommend:
        for idx in similar_car_indices:
            car = (df.at[idx, 'make'], df.at[idx, 'model'], df.at[idx, 'vehicle_type'])
            if car not in seen and car not in previous_bids_set:
                unique_recommendations.append((idx, sim_scores[idx]))
                seen.add(car)
            if len(unique_recommendations) == num_recommend:
                break
    
    # Get recommended cars details along with similarity scores
    recommended_cars = df.iloc[[idx for idx, score in unique_recommendations]][['make', 'model', 'vehicle_type']].drop_duplicates()
    similarity_scores = [score for idx, score in unique_recommendations]
    recommended_cars['similarity_score'] = similarity_scores
    
    return recommended_cars

# Example: Recommend cars for buyer 6928
buyer_id = 6262
recommended_cars = get_recommendations(buyer_id, num_recommend=5)
print(f"Recommendations for buyer {buyer_id}:")
print(recommended_cars)

# Example: Recommend cars for a new buyer without bids
new_buyer_id = 999
recommended_cars_new = get_recommendations(new_buyer_id, num_recommend=3)
print(f"Recommendations for new buyer {new_buyer_id}:")
print(recommended_cars_new)


Previous bids for buyer 6262: [['TATA', 'FULLY-BUILT/SIGNA 3525 HD', 'CV']]
CV    4
Name: vehicle_type, dtype: int64
Recommendations for buyer 6262:
              make                       model vehicle_type  similarity_score
790  ASHOK LEYLAND                   BADA DOST           CV          0.115922
701       MAHINDRA               BLAZO 35 BSVI           CV          0.112454
764       MAHINDRA  BOLERO PIK UP CBC MS 1.3 T           CV          0.098083
629       MAHINDRA               FURIO 11.10 T           CV          0.097143
77            TATA                       TIGOR           4W          0.177054
No bids found for buyer_id 999. Recommending based on overall popularity.
Recommendations for new buyer 999:
       make                      model vehicle_type
0   HYUNDAI                      R 215           CE
1      TATA  FULLY-BUILT/SIGNA 3525 HD           CV
2  MAHINDRA              BLAZO 35 BSVI           CV
