In [92]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
from sklearn.metrics.pairwise import cosine_similarity
from src.models import db, Laptop, Cpu, Gpu, Windows as Wind, Application as App
from src import app

### GET LAPTOP FROM DATABASE

In [93]:
laptop_query = db.select(Laptop.id, Laptop.name, Laptop.hddStorage,
                      Laptop.ssdStorage, Laptop.ram, Cpu.maxSpeed,
                      Cpu.cores, Gpu.maxSpeed.label("gpuMaxSpeed"), Gpu.memory.label("gpuMemory"), Gpu.directX, Gpu.openGl, Wind.buildNumber).join(Laptop.cpu).join(Laptop.gpu).join(Laptop.windows)

with app.app_context():
    laptops_full = pd.read_sql(laptop_query, con=db.engine)
laptops_full["totalStorage"] = laptops_full["ssdStorage"] + laptops_full["hddStorage"]

laptops_full.head()

2024-03-20 12:11:05,684 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 12:11:05,685 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname_1)s
2024-03-20 12:11:05,686 INFO sqlalchemy.engine.Engine [cached since 671.9s ago] {'table_name': <sqlalchemy.sql.selectable.Select object at 0x713447106210>, 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'pg_catalog'}
2024-03-20 12:11:05,687 INFO sqlalchemy.engine.Engine SELECT "Laptop".id, "Laptop".name, "Laptop"."hddStorage", "Laptop"."ssdStorage", "Laptop".ram, "Cpu"."maxSpeed", "Cpu".cores, 

Unnamed: 0,id,name,hddStorage,ssdStorage,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
0,1530159,Lenovo Legion 5 Pro 16,0.0,512.0,8.0,4.7,8,1410.0,8.0,12.2,4.6,22000,512.0
1,1259728,ASUS Vivobook 16,0.0,1000.0,8.0,5.0,16,1350.0,0.0,12.1,4.6,22000,1000.0
2,1518251,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,10240,512.0
3,1518264,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,22000,512.0
4,1491011,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,10240,512.0


In [94]:
laptops_full["openGl"].unique()
first_half = laptops_full[laptops_full["openGl"] != 4.6][0:50]
second_half = laptops_full[laptops_full["openGl"] == 4.6][0:50]
laptops_full = pd.concat([first_half, second_half], ignore_index=True)
laptops_full.head()

Unnamed: 0,id,name,hddStorage,ssdStorage,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
0,1009046,ASUS VivoBook 14 F413,0.0,128.0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
1,687418,Lenovo IdeaPad 3 15,0.0,128.0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
2,583289,HP 14 (14-dq1000),0.0,128.0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
3,916346,ASUS VivoBook 14,0.0,128.0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
4,874311,Acer Aspire 5,0.0,128.0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0


### REMOVE UNUSED PROPERTY

In [95]:
imp_laptop = laptops_full.drop(columns=["id", "name", "ssdStorage", "hddStorage"])
imp_laptop.head()

Unnamed: 0,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
0,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
1,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
2,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
3,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0
4,4.0,3.4,2,1100.0,0.0,12.1,4.5,10240,128.0


### NORMALIZE

In [96]:
normalized = imp_laptop.apply(zscore)
normalized.head()

Unnamed: 0,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
0,-0.666312,-0.773834,-0.800257,-0.695503,-0.775273,-0.79959,-0.576341,-0.833616,-0.538864
1,-0.666312,-0.773834,-0.800257,-0.695503,-0.775273,-0.79959,-0.576341,-0.833616,-0.538864
2,-0.666312,-0.773834,-0.800257,-0.695503,-0.775273,-0.79959,-0.576341,-0.833616,-0.538864
3,-0.666312,-0.773834,-0.800257,-0.695503,-0.775273,-0.79959,-0.576341,-0.833616,-0.538864
4,-0.666312,-0.773834,-0.800257,-0.695503,-0.775273,-0.79959,-0.576341,-0.833616,-0.538864


### FIND COSINE SIMILARITY

In [97]:
sim_matrix = cosine_similarity(normalized)
sim_sorted = np.vstack([np.argsort(-row) for row in sim_matrix])

In [98]:
idx = laptops_full[laptops_full["id"]== "1514446"].index
print(idx)

Index([77], dtype='int64')


In [99]:
similar_ids = sim_sorted[idx, 0:6]
result = laptops_full.iloc[similar_ids[0]]
result

Unnamed: 0,id,name,hddStorage,ssdStorage,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
77,1514446,HP 14,0.0,2000.0,48.0,3.8,4,1500.0,0.0,12.1,4.6,22000,2000.0
91,1514507,HP 14,0.0,4000.0,64.0,3.8,4,1500.0,0.0,12.1,4.6,10240,4000.0
92,1303504,Acer Swift Go 16,0.0,1000.0,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,1000.0
99,1473041,Acer Swift Go 14,0.0,512.0,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,512.0
94,1484450,Acer Swift Go 14,0.0,512.0,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,512.0
81,1313433,ASUS Vivobook 14X,0.0,1000.0,16.0,5.0,16,1372.0,4.0,12.1,4.6,22000,1000.0


In [100]:
laptops_full = laptops_full.drop(columns=["hddStorage", "ssdStorage"])

In [101]:
similarity = cosine_similarity(normalized, normalized.values[77].reshape(1, -1))
laptops_full["similarity"] = similarity

laptops_full.sort_values(by="similarity", ascending=False).head(6)

Unnamed: 0,id,name,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage,similarity
77,1514446,HP 14,48.0,3.8,4,1500.0,0.0,12.1,4.6,22000,2000.0,1.0
91,1514507,HP 14,64.0,3.8,4,1500.0,0.0,12.1,4.6,10240,4000.0,0.92434
92,1303504,Acer Swift Go 16,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,1000.0,0.510215
99,1473041,Acer Swift Go 14,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,512.0,0.458594
94,1484450,Acer Swift Go 14,16.0,5.0,16,1350.0,0.0,12.1,4.6,22000,512.0,0.458594
81,1313433,ASUS Vivobook 14X,16.0,5.0,16,1372.0,4.0,12.1,4.6,22000,1000.0,0.453537
