**Mount Google Drive**

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Importing Libraries**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.decomposition import TruncatedSVD

**Loading and Pre-processing the Data**

In [None]:
# Load the dataset
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/mercari/train.csv', sep='\t')

# Drop unnecessary columns
#data.drop(['category_name', 'brand_name', 'item_description'], axis=1, inplace=True)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,train_id,name,item_condition_id,category_name,brand_name,price,shipping,item_description
0,0,MLB Cincinnati Reds T Shirt Size XL,3,Men/Tops/T-shirts,,10.0,1,No description yet
1,1,Razer BlackWidow Chroma Keyboard,3,Electronics/Computers & Tablets/Components & P...,Razer,52.0,0,This keyboard is in great condition and works ...
2,2,AVA-VIV Blouse,1,Women/Tops & Blouses/Blouse,Target,10.0,1,Adorable top with a hint of lace and a key hol...
3,3,Leather Horse Statues,1,Home/Home Décor/Home Décor Accents,,35.0,1,New with tags. Leather horses. Retail for [rm]...
4,4,24K GOLD plated rose,1,Women/Jewelry/Necklaces,,44.0,0,Complete with certificate of authenticity


**Handling the Missing Data**

In [None]:
# Fill the missing values
data['category_name'].fillna('Unknown', inplace=True)
data['brand_name'].fillna('Unknown', inplace=True)
data['item_description'].fillna('No description yet', inplace=True)

**Creating a Product-Based Recommendation System**

In [None]:
# Combine relevant features into a single string
data['combined_features'] = data['name'] + ' ' + data['category_name'] + ' ' + data['brand_name'] + ' ' + data['item_description']

# Vectorize the combined features
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['combined_features'])

# Reduce the dimensionality of the matrix using SVD
svd = TruncatedSVD(n_components=100)
tfidf_reduced = svd.fit_transform(tfidf_matrix)

# Compute the cosine similarity on the reduced matrix
cosine_sim = linear_kernel(tfidf_reduced, tfidf_reduced)

# Function to get product recommendations
def get_recommendations(product_id, cosine_sim=cosine_sim):
    idx = data.index[data['train_id'] == product_id].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar products
    product_indices = [i[0] for i in sim_scores]
    return data['train_id'].iloc[product_indices]

# Output
recommended_products = get_recommendations(product_id=100)
print(recommended_products)

**Creating a Price-Based Recommendation System**

In [None]:
# Function to get Price-Based Recommendations
def get_price_based_recommendations(price, range=10):
    lower_bound = price - range
    upper_bound = price + range
    recommended = data[(data['price'] >= lower_bound) & (data['price'] <= upper_bound)]
    return recommended

# Output
price_recommendations = get_price_based_recommendations(price=50)
print(price_recommendations)

**Combing Product and Price-Based Recommendations**

In [None]:
# Function to get combined recommendations
def get_combined_recommendations(product_id, price_range=10):
   product_recommendations = get_recommendations(product_id)
   price = data.loc[data['train_id'] == product_id, 'price'].values[0]
   combined_recommendations = data[(data['train_id'].isin(product_recommendations)) & (data['price'] >= price - price_range) & (data['price'] <= price + price_range)]
   return combined_recommendations

# Output
combined_recommendations = get_combined_recommendations(product_id=100)
print(combined_recommendations)
