**Import Library and Load The Data**

In [89]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [90]:
df = pd.read_csv('/content/drive/MyDrive/Trending Topics on Statistics/Final_Dataku.csv',sep=";")
df.head()

Unnamed: 0,Brand,Laptop_name,Display_size,Processor_type,Graphics_card,Disk_space,Discount_price,Old_price,Rating
0,HP,Notebook 14-df0008nx,14.0,Intel Celeron N4000,Intel HD Graphics 600,64 GB (eMMC),129.0,129.0,0 /
1,Lenovo,IdeaPad 330S-14IKB,14.0,Intel Core i-820U,Intel UHD Graphics 620,1 TB HDD,1849.0,2099.0,3.3 /
2,Huawei,MateBook D Volta,14.0,Intel Core i-820U,NVIDIA GeForce MX10 (2 GB),26 GB SSD,2999.0,3799.0,0 /
3,Dell,Inspiron 1 367,1.6,Intel Core i3-7020U,Intel HD Graphics 620,1 TB HDD,1849.0,1849.0,0 /
4,Asus,VivoBook 1 X10UR,1.6,Intel Core i7-80U,NVIDIA GeForce 930MX (2 GB),1 TB HDD,2499.0,3149.0,0 /


**Data Preparation**

In [91]:
# Cleaning data
# Remove any non-numeric characters (e.g., spaces) from the 'Rating' column
df['Rating'] = df['Rating'].str.replace(r'[^\d.]+', '', regex=True)

# Convert the 'Rating' column to float or numeric data type
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')

# The 'coerce' option will turn any invalid parsing (e.g., non-numeric values) into NaN (Not a Number).
# If you don't want NaN values, you can choose to drop or fill them with appropriate values.
# For example, to drop rows with NaN values:
# df.dropna(subset=['Rating'], inplace=True)

# Display the DataFrame to verify the changes
print(df.head())


    Brand           Laptop_name  Display_size        Processor_type  \
0      HP  Notebook 14-df0008nx          14.0   Intel Celeron N4000   
1  Lenovo    IdeaPad 330S-14IKB          14.0     Intel Core i-820U   
2  Huawei      MateBook D Volta          14.0     Intel Core i-820U   
3    Dell        Inspiron 1 367           1.6   Intel Core i3-7020U   
4    Asus      VivoBook 1 X10UR           1.6     Intel Core i7-80U   

                  Graphics_card     Disk_space  Discount_price  Old_price  \
0         Intel HD Graphics 600   64 GB (eMMC)           129.0      129.0   
1        Intel UHD Graphics 620       1 TB HDD          1849.0     2099.0   
2    NVIDIA GeForce MX10 (2 GB)      26 GB SSD          2999.0     3799.0   
3         Intel HD Graphics 620       1 TB HDD          1849.0     1849.0   
4   NVIDIA GeForce 930MX (2 GB)       1 TB HDD          2499.0     3149.0   

   Rating  
0     0.0  
1     3.3  
2     0.0  
3     0.0  
4     0.0  


In [92]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Brand           205 non-null    object 
 1   Laptop_name     204 non-null    object 
 2   Display_size    205 non-null    float64
 3   Processor_type  205 non-null    object 
 4   Graphics_card   205 non-null    object 
 5   Disk_space      205 non-null    object 
 6   Discount_price  205 non-null    float64
 7   Old_price       205 non-null    float64
 8   Rating          205 non-null    float64
dtypes: float64(4), object(5)
memory usage: 14.5+ KB


In [93]:
df.shape

(205, 9)

In [94]:
df.isnull().sum()

Brand             0
Laptop_name       1
Display_size      0
Processor_type    0
Graphics_card     0
Disk_space        0
Discount_price    0
Old_price         0
Rating            0
dtype: int64

In [95]:
df.dropna(inplace=True)

In [96]:
# selecting only the relevant columns
selected_feature = ['Brand', 'Laptop_name', 'Display_size', 'Processor_type', 'Graphics_card', 'Disk_space', 'Discount_price','Old_price']
print(selected_feature)

['Brand', 'Laptop_name', 'Display_size', 'Processor_type', 'Graphics_card', 'Disk_space', 'Discount_price', 'Old_price']


In [98]:
# replacing the null values with empty string
for feature in selected_feature:
    df[feature] = df[feature].fillna('')

In [99]:
# Print the column names in the DataFrame
print(df.columns)

Index(['Brand', 'Laptop_name', 'Display_size', 'Processor_type',
       'Graphics_card', 'Disk_space', 'Discount_price', 'Old_price', 'Rating'],
      dtype='object')


In [100]:
# Convert all columns to string data type before concatenation
df['Brand'] = df['Brand'].astype(str)
df['Laptop_name'] = df['Laptop_name'].astype(str)
df['Display_size'] = df['Display_size'].astype(str)
df['Processor_type'] = df['Processor_type'].astype(str)
df['Graphics_card'] = df['Graphics_card'].astype(str)
df['Disk_space'] = df['Disk_space'].astype(str)

# Combine all selected features
combined_features = df['Brand'] + ' ' + df['Laptop_name'] + ' ' + df['Display_size'] + ' ' + df['Processor_type'] + ' ' + df['Graphics_card'] + ' ' + df['Disk_space']

# Print the combined features
print(combined_features)


0      HP Notebook 14-df0008nx 14.0  Intel Celeron N4...
1      Lenovo IdeaPad 330S-14IKB 14.0  Intel Core i-8...
2      Huawei MateBook D Volta 14.0  Intel Core i-820...
3      Dell Inspiron 1 367 1.6  Intel Core i3-7020U  ...
4      Asus VivoBook 1 X10UR 1.6  Intel Core i7-80U  ...
                             ...                        
200    Lenovo IdeaPad 320-1IKBRN 1.6  Intel Core i-82...
201    Huawei MateBook D 1.6  Intel Core i7-80U  NVID...
202    Apple MacBook Pro (Retina + Touch Bar) 1.4  In...
203    Apple MacBook Pro (Retina + Touch Bar) 1.4  In...
204    Dell Inspiron 1 376 1.6  Intel Core i-820U  AM...
Length: 204, dtype: object


In [101]:
# Replace NaN values with empty strings in 'Laptop_name' column
df['Laptop_name'] = df['Laptop_name'].fillna('')

In [102]:
print(combined_features)

0      HP Notebook 14-df0008nx 14.0  Intel Celeron N4...
1      Lenovo IdeaPad 330S-14IKB 14.0  Intel Core i-8...
2      Huawei MateBook D Volta 14.0  Intel Core i-820...
3      Dell Inspiron 1 367 1.6  Intel Core i3-7020U  ...
4      Asus VivoBook 1 X10UR 1.6  Intel Core i7-80U  ...
                             ...                        
200    Lenovo IdeaPad 320-1IKBRN 1.6  Intel Core i-82...
201    Huawei MateBook D 1.6  Intel Core i7-80U  NVID...
202    Apple MacBook Pro (Retina + Touch Bar) 1.4  In...
203    Apple MacBook Pro (Retina + Touch Bar) 1.4  In...
204    Dell Inspiron 1 376 1.6  Intel Core i-820U  AM...
Length: 204, dtype: object


In [103]:
# converting text data to feature vectors
vectorizer = TfidfVectorizer()

In [104]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [105]:
print(feature_vectors)

  (0, 159)	0.30556813668604
  (0, 163)	0.08792847505097365
  (0, 60)	0.2842615329061462
  (0, 53)	0.29070585847033914
  (0, 167)	0.13754257671674894
  (0, 172)	0.19147812109195236
  (0, 200)	0.2842615329061462
  (0, 136)	0.25846488640186555
  (0, 181)	0.14885037900743944
  (0, 157)	0.41696465463478216
  (0, 8)	0.3571299645440364
  (0, 201)	0.41696465463478216
  (0, 174)	0.18158838256712642
  (1, 173)	0.15522840241681268
  (1, 236)	0.15608207364307716
  (1, 59)	0.25951404686077595
  (1, 240)	0.2571174950593634
  (1, 76)	0.26448768756964164
  (1, 140)	0.10806145712762384
  (1, 10)	0.4687745802923902
  (1, 34)	0.43531171406906033
  (1, 179)	0.31277245321042113
  (1, 183)	0.31277245321042113
  (1, 167)	0.18469265620398415
  (1, 181)	0.1998768129265825
  :	:
  (202, 117)	0.27124459223705044
  (202, 239)	0.27124459223705044
  (202, 5)	0.23920503233395357
  (202, 209)	0.48437449156920487
  (202, 215)	0.21647260281277508
  (202, 189)	0.2101476858815056
  (202, 114)	0.2101476858815056
  (202, 2

**Cosine Similarity**

In [106]:
# getting the similarity scores
similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.14078664 0.12087332 ... 0.03110483 0.02831716 0.02534579]
 [0.14078664 1.         0.15857772 ... 0.03781683 0.03442763 0.15058758]
 [0.12087332 0.15857772 1.         ... 0.15155274 0.0900721  0.11952368]
 ...
 [0.03110483 0.03781683 0.15155274 ... 1.         0.86843695 0.12282518]
 [0.02831716 0.03442763 0.0900721  ... 0.86843695 1.         0.11181739]
 [0.02534579 0.15058758 0.11952368 ... 0.12282518 0.11181739 1.        ]]


In [107]:
print(similarity.shape)

(204, 204)


In [108]:
# Create a DataFrame to store the similarity scores
df_similarity = pd.DataFrame(similarity)

In [109]:
print(df_similarity)

          0         1         2         3         4         5         6    \
0    1.000000  0.140787  0.120873  0.109869  0.023454  0.108969  0.064995   
1    0.140787  1.000000  0.158578  0.212128  0.074389  0.123644  0.162174   
2    0.120873  0.158578  1.000000  0.033206  0.122525  0.142896  0.094738   
3    0.109869  0.212128  0.033206  1.000000  0.078296  0.147415  0.101685   
4    0.023454  0.074389  0.122525  0.078296  1.000000  0.186627  0.052741   
..        ...       ...       ...       ...       ...       ...       ...   
199  0.075663  0.350315  0.097014  0.167567  0.071261  0.068956  0.060142   
200  0.040093  0.090121  0.615247  0.094854  0.320183  0.347254  0.182065   
201  0.031105  0.037817  0.151553  0.039803  0.081434  0.181950  0.288515   
202  0.028317  0.034428  0.090072  0.036236  0.074136  0.165644  0.262658   
203  0.025346  0.150588  0.119524  0.280281  0.078332  0.311710  0.056995   

          7         8         9    ...       194       195       196  \
0  

In [110]:
# write to csv
df_similarity.to_csv('/content/drive/MyDrive/Trending Topics on Statistics/cosine_sim.csv', index=False)

**Getting KDRAMA Name**

In [133]:

# User input for laptop search
laptop_name_input = input("Enter the name of the laptop you want to search: ")

# Preprocess the input (if needed)
laptop_name_input = laptop_name_input.lower()  # Convert to lowercase

# Search for the index of the closest laptop name using difflib
closest_match = difflib.get_close_matches(laptop_name_input, df['Laptop_name'], n=1)
if closest_match:
    closest_index = df[df['Laptop_name'] == closest_match[0]].index[0]

    # Get similarity scores for the searched laptop
    similar_scores = df_similarity.iloc[closest_index].sort_values(ascending=False)

    # Get the top 5 most similar laptops (excluding the laptop itself)
    top_similar_laptops = similar_scores.iloc[1:6]

    # Display the results
    print("Top 5 most similar laptops:")
    print(df.iloc[top_similar_laptops.index]['Laptop_name'])
else:
    print("Laptop not found in the dataset.")


Enter the name of the laptop you want to search: MateBook D Volta
Top 5 most similar laptops:
28    MateBook D Volta
7           MateBook D
55          MateBook D
8       MateBook X Pro
44          MateBook D
Name: Laptop_name, dtype: object


In [145]:
# finding close matches for kdrama
find_close_matches = difflib.get_close_matches(laptop_name_input, closest_match )
print(find_close_matches)

['MateBook D Volta']


In [146]:
close_matches = find_close_matches[0]
print(close_matches)

MateBook D Volta


In [147]:
# finding the index of the movie
index_of_laptop = df[df['Laptop_name'] == close_matches].index[0]
print("Indeks laptop terdekat:", index_of_laptop)

Indeks laptop terdekat: 2


In [148]:
# Mengindeks similarity menggunakan integer index_of_laptop
similarity_scores = list(enumerate(similarity[index_of_laptop]))
print(similarity_scores)

[(0, 0.120873323745086), (1, 0.15857771969253168), (2, 1.0000000000000002), (3, 0.03320557288087525), (4, 0.12252455299681561), (5, 0.14289606854139425), (6, 0.09473797682089045), (7, 0.7195128394500737), (8, 0.6222838985733872), (9, 0.20964994594910474), (10, 0.0), (11, 0.15432234027424846), (12, 0.06237712524767084), (13, 0.10687152907156103), (14, 0.15155273507648512), (15, 0.279694148910013), (16, 0.08560210804506639), (17, 0.14134375967199125), (18, 0.5534649659156857), (19, 0.15504755850286894), (20, 0.12741181267496482), (21, 0.12590525431003605), (22, 0.15504755850286894), (23, 0.15504755850286894), (24, 0.40082373367715857), (25, 0.09473797682089045), (26, 0.40082373367715857), (27, 0.09473797682089045), (28, 0.90577613470096), (29, 0.334078897732376), (30, 0.33787602529387495), (31, 0.5037843636223869), (32, 0.2565093564360004), (33, 0.14290574260579184), (34, 0.2991539028688831), (35, 0.2991539028688831), (36, 0.21592877364558322), (37, 0.22795199667516147), (38, 0.240683611

In [149]:
print("Daftar skor kesamaan laptop terdekat:")
print(similarity_scores)

Daftar skor kesamaan laptop terdekat:
[(0, 0.120873323745086), (1, 0.15857771969253168), (2, 1.0000000000000002), (3, 0.03320557288087525), (4, 0.12252455299681561), (5, 0.14289606854139425), (6, 0.09473797682089045), (7, 0.7195128394500737), (8, 0.6222838985733872), (9, 0.20964994594910474), (10, 0.0), (11, 0.15432234027424846), (12, 0.06237712524767084), (13, 0.10687152907156103), (14, 0.15155273507648512), (15, 0.279694148910013), (16, 0.08560210804506639), (17, 0.14134375967199125), (18, 0.5534649659156857), (19, 0.15504755850286894), (20, 0.12741181267496482), (21, 0.12590525431003605), (22, 0.15504755850286894), (23, 0.15504755850286894), (24, 0.40082373367715857), (25, 0.09473797682089045), (26, 0.40082373367715857), (27, 0.09473797682089045), (28, 0.90577613470096), (29, 0.334078897732376), (30, 0.33787602529387495), (31, 0.5037843636223869), (32, 0.2565093564360004), (33, 0.14290574260579184), (34, 0.2991539028688831), (35, 0.2991539028688831), (36, 0.21592877364558322), (37, 

In [150]:
len(similarity_scores)

204

In [151]:
# sorting movies based on similarity score
sorted_similar_laptop = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
print(sorted_similar_laptop)

[(2, 1.0000000000000002), (28, 0.90577613470096), (7, 0.7195128394500737), (55, 0.7195128394500737), (8, 0.6222838985733872), (43, 0.6152471786224447), (44, 0.6152471786224447), (200, 0.6152471786224447), (18, 0.5534649659156857), (31, 0.5037843636223869), (78, 0.4414150965593383), (24, 0.40082373367715857), (26, 0.40082373367715857), (189, 0.3845724641585434), (45, 0.36888333650142185), (149, 0.3550490689284843), (30, 0.33787602529387495), (29, 0.334078897732376), (143, 0.30479751275316874), (59, 0.3019207497869781), (34, 0.2991539028688831), (35, 0.2991539028688831), (39, 0.2875291504211646), (15, 0.279694148910013), (147, 0.2684083378681643), (169, 0.2684083378681643), (173, 0.2591461609770364), (32, 0.2565093564360004), (130, 0.2556390112061512), (46, 0.25088089508471223), (41, 0.24645519635158025), (153, 0.2455292079265699), (38, 0.2406836114423106), (87, 0.23895015707940895), (110, 0.22908186201788852), (117, 0.22908186201788852), (37, 0.22795199667516147), (40, 0.225927656204508

In [157]:
# Cetak DataFrame df
print(df)

# Cetak similarity scores
print(similarity)


      Brand                       Laptop_name Display_size  \
0        HP              Notebook 14-df0008nx         14.0   
1    Lenovo                IdeaPad 330S-14IKB         14.0   
2    Huawei                  MateBook D Volta         14.0   
3      Dell                    Inspiron 1 367          1.6   
4      Asus                  VivoBook 1 X10UR          1.6   
..      ...                               ...          ...   
200  Lenovo                IdeaPad 320-1IKBRN          1.6   
201  Huawei                        MateBook D          1.6   
202   Apple  MacBook Pro (Retina + Touch Bar)          1.4   
203   Apple  MacBook Pro (Retina + Touch Bar)          1.4   
204    Dell                    Inspiron 1 376          1.6   

            Processor_type                 Graphics_card  \
0      Intel Celeron N4000         Intel HD Graphics 600   
1        Intel Core i-820U        Intel UHD Graphics 620   
2        Intel Core i-820U    NVIDIA GeForce MX10 (2 GB)   
3      Intel Co

In [161]:
print("Laptop suggested for you : \n")

i = 1

for index, laptop in sorted_similar_laptop.iterrows():
    title_from_index = df[df['Laptop_name'] == laptop['Laptop_name']]['Laptop_name'].values
    if len(title_from_index) > 0:
        title_from_index = title_from_index[0]
        if i <= 20:
            print(i, ".", title_from_index)
            i += 1
    else:
        continue

if i == 1:
    print("No laptop recommendations found.")


Laptop suggested for you : 

1 . MateBook D Volta
2 . MateBook D Volta
3 . MateBook D
4 . MateBook D
5 . MateBook X Pro
6 . MateBook D
7 . MateBook D
8 . MateBook D
9 . MateBook 13
10 . MateBook X Pro
11 . MateBook 13
12 . Pavilion 14-ce0000nx
13 . Pavilion 14-ce0001nx
14 . ZenBook UX430UN Ultrabook
15 . MateBook 13
16 . VivoBook S430
17 . VivoBook 14 S430FN
18 . ZenBook 14 UX433FN
19 . ZenBook UX433FN
20 . Swift 3 314
