In [1]:
import pandas as pd
import numpy as np 

from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

import warnings; warnings.simplefilter('ignore')

In [2]:
df = pd.read_csv('./Mobile_dataset/mobile_recommendation_system_dataset.csv')


In [3]:
df.head()

Unnamed: 0,name,ratings,price,imgURL,corpus
0,"REDMI Note 12 Pro 5G (Onyx Black, 128 GB)",4.2,23999,https://rukminim2.flixcart.com/image/312/312/x...,Storage128 GBRAM6 SystemAndroid 12Processor T...
1,"OPPO F11 Pro (Aurora Green, 128 GB)",4.5,"₹20,999",https://rukminim2.flixcart.com/image/312/312/k...,Storage128 GBRAM6 GBExpandable Storage256GB S...
2,"REDMI Note 11 (Starburst White, 64 GB)",4.2,13149,https://rukminim2.flixcart.com/image/312/312/x...,Storage64 GBRAM4 SystemAndroid 11Processor Sp...
3,"OnePlus Nord CE 5G (Blue Void, 256 GB)",4.1,21999,https://rukminim2.flixcart.com/image/312/312/x...,Storage256 GBRAM12 SystemAndroid Q 11Processo...
4,"APPLE iPhone 13 mini (Blue, 128 GB)",4.6,3537,https://rukminim2.flixcart.com/image/312/312/k...,Storage128 SystemiOS 15Processor TypeA15 Bion...


In [4]:
tmp = df.copy()

In [5]:
tmp['price'] = tmp['price'].str.replace(r"[^\d\.]", "", regex=True)

In [6]:
tmp['price'] = tmp['price'].astype('float64')

In [7]:
tmp['corpus'] = tmp['corpus'].str.lower()

In [56]:
import re

def extract_storage(corpus):
    match = re.search(r'storage(\d+)', corpus)
    if match:
        return int(match.group(1))
    return None

# Apply the function to the 'corpus' column
tmp['storage'] = tmp['corpus'].apply(extract_storage)

In [57]:
def extract_ram(corpus):
    """Extract the RAM from the corpus."""
    match = re.search(r'ram(\d+)', corpus)
    if match:
        return int(match.group(1))
    return None

def extract_system(corpus):
    """Extract the operating system from the corpus."""
    match = re.search(r'system(.*?)processor', corpus)
    if match:
        return match.group(1).strip()
    return None

def extract_processor(corpus):
    """Extract the processor type from the corpus."""
    match = re.search(r'processor (.*?) ', corpus)
    if match:
        return match.group(1).strip()
    return None

In [58]:
tmp['ram'] = tmp['corpus'].apply(extract_ram)
tmp['system'] = tmp['corpus'].apply(extract_system)
tmp['processor'] = tmp['corpus'].apply(extract_processor)

In [8]:
tmp.isnull().sum()

name        0
ratings     0
price       0
imgURL      0
corpus     12
dtype: int64

In [10]:
tmp = tmp.dropna()

In [12]:
df = tmp.copy()

In [13]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['corpus'])

In [14]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [50]:
cosine_sim

array([[1.        , 0.04786072, 0.0884595 , ..., 0.05959329, 0.03551622,
        0.16817265],
       [0.04786072, 1.        , 0.0132211 , ..., 0.05689957, 0.00488636,
        0.03936838],
       [0.0884595 , 0.0132211 , 1.        , ..., 0.30769847, 0.01731517,
        0.09351258],
       ...,
       [0.05959329, 0.05689957, 0.30769847, ..., 1.        , 0.01331809,
        0.050823  ],
       [0.03551622, 0.00488636, 0.01731517, ..., 0.01331809, 1.        ,
        0.03058663],
       [0.16817265, 0.03936838, 0.09351258, ..., 0.050823  , 0.03058663,
        1.        ]])

In [49]:
list(enumerate(cosine_sim[0]))[:5]

[(0, 1.0000000000000002),
 (1, 0.04786071843668162),
 (2, 0.08845950104082932),
 (3, 0.046522460951568576),
 (4, 0.05194747068286624)]

In [15]:
df = df.reset_index()
names = df['name']
indices = pd.Series(df.index, index=df['name'])

In [41]:
def get_recommendations(name):
    idx = indices[name]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    name_indices = [i[0] for i in sim_scores]
    simple_rec = df.iloc[name_indices][['name', 'ratings', 'price']]
    simple_rec['wr'] = simple_rec['price']/simple_rec['ratings']
    return simple_rec.sort_values('wr', ascending=True)
    

In [32]:
get_recommendations(df['name'][0])

Unnamed: 0,name,ratings,price
68,"REDMI Note 12 Pro 5G (Stardust Purple, 128 GB)",4.2,23999.0
303,"REDMI Note 12 Pro 5G (Glacier Blue, 128 GB)",4.2,23999.0
616,"REDMI Note 12 Pro 5G (Stardust Purple, 128 GB)",4.2,24999.0
1461,"REDMI Note 12 Pro 5G (Glacier Blue, 128 GB)",4.2,24999.0
457,"REDMI Note 12 Pro 5G (Onyx Black, 256 GB)",4.2,26999.0
1027,"REDMI Note 12 Pro 5G (Stardust Purple, 256 GB)",4.2,26999.0
1735,"REDMI Note 12 Pro 5G (Glacier Blue, 256 GB)",4.2,26999.0
569,"REDMI Note 12 Pro+ 5G (Arctic White, 256 GB)",4.2,29999.0
1123,"REDMI Note 12 Pro+ 5G (Arctic White, 256 GB)",4.2,32537.0
1540,"REDMI Note 12 Pro+ 5G (Iceberg Blue, 256 GB)",4.2,32537.0


In [42]:
get_recommendations(df['name'][0])

Unnamed: 0,name,ratings,price,wr
531,"REDMI Note 12 (Sunrise Gold, 128 GB)",4.1,15999.0,3902.195122
1481,"vivo V23e 5G (Sunshine Gold, 128 GB)",4.5,23900.0,5311.111111
2048,"Xiaomi 11i Hypercharge 5G (Stealth Black, 128 GB)",4.2,23999.0,5714.047619
528,"Xiaomi 11i Hypercharge 5G (Camo Green, 128 GB)",4.2,23999.0,5714.047619
257,"Xiaomi 11i Hypercharge 5G (Pacific Pearl, 128 GB)",4.2,23999.0,5714.047619
68,"REDMI Note 12 Pro 5G (Stardust Purple, 128 GB)",4.2,23999.0,5714.047619
303,"REDMI Note 12 Pro 5G (Glacier Blue, 128 GB)",4.2,23999.0,5714.047619
1791,"vivo V23e 5G (Midnight Blue, 128 GB)",4.3,24999.0,5813.72093
1675,"realme 9 Pro+ 5G (Aurora Green, 128 GB)",4.3,24999.0,5813.72093
2163,"Xiaomi 11i 5G (Stealth Black, 128 GB)",4.2,24999.0,5952.142857


In [51]:
get_recommendations('APPLE iPhone 13 mini (Blue, 128 GB)')

Unnamed: 0,name,ratings,price,wr
429,"APPLE iPhone 13 mini ((PRODUCT)RED, 128 GB)",4.7,3537.0,752.553191
2199,"APPLE iPhone 13 (Midnight, 128 GB)",4.7,3537.0,752.553191
2142,"APPLE iPhone 13 (Green, 128 GB)",4.7,3537.0,752.553191
1719,"APPLE iPhone 13 ((PRODUCT)RED, 128 GB)",4.7,3537.0,752.553191
1243,"APPLE iPhone 13 (Starlight, 128 GB)",4.7,3537.0,752.553191
832,"APPLE iPhone 13 (Blue, 128 GB)",4.7,3537.0,752.553191
1366,"APPLE iPhone 13 mini (Green, 128 GB)",4.7,3537.0,752.553191
207,"APPLE iPhone 13 mini (Midnight, 128 GB)",4.5,3537.0,786.0
720,"APPLE iPhone 13 mini (Starlight, 128 GB)",4.5,3537.0,786.0
2295,"APPLE iPhone 13 mini (Pink, 128 GB)",4.4,3537.0,803.863636
