<a href="https://colab.research.google.com/github/anuragdhirubhai/mobile_recommendation_system/blob/main/recommendation_phone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Step 1: Import Libraries and Load Data



In [None]:
import pandas as pd

# Load the dataset
data_path = "/content/drive/MyDrive/NLP_project_GPT3/mobile_recommendation_system_dataset.csv"
data = pd.read_csv(data_path)

# Display the first few rows of the dataset to understand its structure
data.head()

Unnamed: 0,name,ratings,price,imgURL,corpus
0,"REDMI Note 12 Pro 5G (Onyx Black, 128 GB)",4.2,23999,https://rukminim2.flixcart.com/image/312/312/x...,Storage128 GBRAM6 SystemAndroid 12Processor T...
1,"OPPO F11 Pro (Aurora Green, 128 GB)",4.5,"₹20,999",https://rukminim2.flixcart.com/image/312/312/k...,Storage128 GBRAM6 GBExpandable Storage256GB S...
2,"REDMI Note 11 (Starburst White, 64 GB)",4.2,13149,https://rukminim2.flixcart.com/image/312/312/x...,Storage64 GBRAM4 SystemAndroid 11Processor Sp...
3,"OnePlus Nord CE 5G (Blue Void, 256 GB)",4.1,21999,https://rukminim2.flixcart.com/image/312/312/x...,Storage256 GBRAM12 SystemAndroid Q 11Processo...
4,"APPLE iPhone 13 mini (Blue, 128 GB)",4.6,3537,https://rukminim2.flixcart.com/image/312/312/k...,Storage128 SystemiOS 15Processor TypeA15 Bion...


Data Information


In [None]:
# Display basic information about the dataset
data.info()

# Display summary statistics
data.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2546 entries, 0 to 2545
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   name     2546 non-null   object 
 1   ratings  2546 non-null   float64
 2   price    2546 non-null   object 
 3   imgURL   2546 non-null   object 
 4   corpus   2534 non-null   object 
dtypes: float64(1), object(4)
memory usage: 99.6+ KB


Unnamed: 0,ratings
count,2546.0
mean,4.295797
std,0.214691
min,2.9
25%,4.2
50%,4.3
75%,4.4
max,5.0


Step 3: Data Preprocessing

In [None]:
from sklearn.preprocessing import LabelEncoder

# Handling missing values in 'corpus' column
data['corpus'].fillna(value='', inplace=True)

# Convert the 'price' column to numerical format
data['price'] = data['price'].str.replace('₹', '').str.replace(',', '').astype(float)

# Convert categorical variables to numerical format using Label Encoding
label_encoders = {}
categorical_columns = ['name']
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Display the modified dataset
data.head()


Unnamed: 0,name,ratings,price,imgURL,corpus
0,1086,4.2,23999.0,https://rukminim2.flixcart.com/image/312/312/x...,Storage128 GBRAM6 SystemAndroid 12Processor T...
1,754,4.5,20999.0,https://rukminim2.flixcart.com/image/312/312/k...,Storage128 GBRAM6 GBExpandable Storage256GB S...
2,1055,4.2,13149.0,https://rukminim2.flixcart.com/image/312/312/x...,Storage64 GBRAM4 SystemAndroid 11Processor Sp...
3,945,4.1,21999.0,https://rukminim2.flixcart.com/image/312/312/x...,Storage256 GBRAM12 SystemAndroid Q 11Processo...
4,142,4.6,3537.0,https://rukminim2.flixcart.com/image/312/312/k...,Storage128 SystemiOS 15Processor TypeA15 Bion...


Step 4: Feature Extraction



In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the TF-IDF vectorizer
tfidf_matrix = tfidf_vectorizer.fit_transform(data['corpus'])

# Display the shape of the TF-IDF matrix
print("TF-IDF Matrix Shape:", tfidf_matrix.shape)


TF-IDF Matrix Shape: (2546, 1885)


Step 5: Calculate Similarity

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity between mobile phones
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Display the shape of the cosine similarity matrix
print("Cosine Similarity Matrix Shape:", cosine_sim.shape)


Cosine Similarity Matrix Shape: (2546, 2546)


Step 6: Get Recommendation

In [None]:
def get_recommendations(phone_name, cosine_sim_matrix, df, top_n=5):
    # Get the index of the given phone name
    index = df[df['name'] == phone_name].index[0]

    # Get the cosine similarity scores for the given index
    sim_scores = list(enumerate(cosine_sim_matrix[index]))

    # Sort the mobile phones based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top N similar mobile phones
    top_similar_indices = [i[0] for i in sim_scores[1:top_n+1]]
    top_similar_phones = df['name'].iloc[top_similar_indices].values

    return top_similar_phones

In [None]:
# Rename the columns of the DataFrame
data.rename(columns={
    'index': 'mobile_index',
    'name': 'mobile_name',
    'ratings': 'mobile_ratings',
    'price': 'mobile_price',
    'imgURL': 'mobile_imgURL',
    'corpus': 'mobile_corpus'
}, inplace=True)

Step 7: Get Recommendations for a Phone



In [None]:
# Import necessary libraries
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Calculate cosine similarity between mobile phones
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

cosine_sim

array([[1.        , 0.10240782, 0.13915577, ..., 0.        , 0.05901247,
        0.25962161],
       [0.10240782, 1.        , 0.03248575, ..., 0.        , 0.01020553,
        0.06897763],
       [0.13915577, 0.03248575, 1.        , ..., 0.        , 0.03282428,
        0.18833022],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.05901247, 0.01020553, 0.03282428, ..., 0.        , 1.        ,
        0.05123464],
       [0.25962161, 0.06897763, 0.18833022, ..., 0.        , 0.05123464,
        1.        ]])

In [None]:
# Define a function to get recommendations
def get_recommendations(input_index, cosine_sim_matrix, df, top_n=5):
    sim_scores = list(enumerate(cosine_sim_matrix[input_index]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    similar_indices = [i[0] for i in sim_scores[1:top_n+1]]
    similar_phones = df['mobile_name'].iloc[similar_indices].values
    return similar_phones

In [None]:
# Get user input for a mobile phone
user_input = input("Enter the name of a mobile phone: ")

Enter the name of a mobile phone: 1087


In [None]:
# Loop until 'end' is input
while True:
    user_input = input("Enter the name of a mobile phone (or 'end' to exit): ")
    if user_input.lower() == 'end':
        break
    exact_match_index = data[data['mobile_name'] == user_input].index
    if len(exact_match_index) > 0:
        user_input_index = exact_match_index[0]
        print(f"\nExact match found for {user_input}:")
        print(f"Name: {user_input}")
        print(f"Corpus: {data['mobile_corpus'].iloc[user_input_index]}\n")
    else:
        similar_phones = get_recommendations(0, cosine_sim, data)
        print("Mobile phone not found in the dataset.")
        print("Here are some similar mobile phones:")
        for i, suggestion in enumerate(similar_phones, start=1):
            suggestion_corpus = data[data['mobile_name'] == suggestion]['mobile_corpus'].iloc[0]
            print(f"{i}. Name: {suggestion}\n   Corpus: {suggestion_corpus}\n")

Enter the name of a mobile phone (or 'end' to exit): end
