In [1]:
import pandas as pd
from scipy.stats import zscore
from sklearn.metrics.pairwise import euclidean_distances
from src.models import db, Laptop, Cpu, Gpu, Windows as Wind, Application as App
from src import app

### GET ALL LAPTOP FROM DATABASE

In [2]:
laptop_query = db.select(Laptop.id, Laptop.name, Laptop.hddStorage,
                      Laptop.ssdStorage, Laptop.ram, Cpu.maxSpeed,
                      Cpu.cores, Gpu.maxSpeed.label("gpuMaxSpeed"), Gpu.memory.label("gpuMemory"), Gpu.directX, Gpu.openGl, Wind.buildNumber).join(Laptop.cpu).join(Laptop.gpu).join(Laptop.windows)

with app.app_context():
    laptops_full = pd.read_sql(laptop_query, con=db.engine)
laptops_full["totalStorage"] = laptops_full["ssdStorage"] + laptops_full["hddStorage"]

laptops_full.head()

2024-03-19 14:24:17,019 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2024-03-19 14:24:17,020 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 14:24:17,022 INFO sqlalchemy.engine.Engine select current_schema()
2024-03-19 14:24:17,024 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 14:24:17,027 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2024-03-19 14:24:17,028 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 14:24:17,036 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-19 14:24:17,038 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname

Unnamed: 0,id,name,hddStorage,ssdStorage,ram,maxSpeed,cores,gpuMaxSpeed,gpuMemory,directX,openGl,buildNumber,totalStorage
0,1530159,Lenovo Legion 5 Pro 16,0.0,512.0,8.0,4.7,8,1410.0,8.0,12.2,4.6,22000,512.0
1,1259728,ASUS Vivobook 16,0.0,1000.0,8.0,5.0,16,1350.0,0.0,12.1,4.6,22000,1000.0
2,1518251,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,10240,512.0
3,1518264,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,22000,512.0
4,1491011,Lenovo LOQ 15,0.0,512.0,8.0,5.1,8,2130.0,6.0,12.2,4.6,10240,512.0


### GET ALL APPS

In [3]:
app_query = db.select(App.id, App.name, App.minCpuSpeed, App.minCores, App.minDirectX, App.minOpenGl,
                      App.minGpuBoostClock, App.minGpuMemory, App.minRam, App.minStorage, Wind.buildNumber).join(App.windows)
with app.app_context():
    app_full = pd.read_sql(app_query, con=db.engine)

app_full.tail()

2024-03-19 14:24:17,217 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-19 14:24:17,219 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_table_is_visible(pg_catalog.pg_class.oid) AND pg_catalog.pg_namespace.nspname != %(nspname_1)s
2024-03-19 14:24:17,220 INFO sqlalchemy.engine.Engine [cached since 0.1833s ago] {'table_name': <sqlalchemy.sql.selectable.Select object at 0x705850e6fd50>, 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'pg_catalog'}
2024-03-19 14:24:17,223 INFO sqlalchemy.engine.Engine SELECT "Application".id, "Application".name, "Application"."minCpuSpeed", "Application"."minCores", "Application"."minDirectX

Unnamed: 0,id,name,minCpuSpeed,minCores,minDirectX,minOpenGl,minGpuBoostClock,minGpuMemory,minRam,minStorage,buildNumber
2386,52f51cb3-a5d1-4c8c-80e8-180c2e96cf08,Forspoken,3.4,6.0,12.05,4.5,589.0,2.048,8.0,50.0,7601
2387,9305e289-41ef-4bb4-a387-cdcdf794632d,Street Fighter 6,3.55,4.0,11.0,3.65,0.0,1.28,8.0,30.0,7601
2388,a34e29b1-2414-4d69-b491-aafa18b8fc2e,Starfield,3.4,6.0,12.05,4.5,589.0,2.048,8.0,50.0,7601
2389,4d421c6b-96c2-49f4-9cd6-15fea9cf669a,Star Wars Jedi: Survivor,3.3,4.0,11.1,4.25,0.0,1.024,8.0,55.0,7601
2390,91bf977d-ef0c-40ac-857e-52805b0b6dd1,Meet Your Maker,3.2,4.0,11.65,4.4,1114.0,2.048,8.0,20.0,10240


### CHOSING REQUIRED APPS

In [4]:
ids = ["eb7ef6cd-079b-4c20-913e-0844a5d1cac8"]

app_full[app_full["name"].str.lower().str.contains("office")].head()


Unnamed: 0,id,name,minCpuSpeed,minCores,minDirectX,minOpenGl,minGpuBoostClock,minGpuMemory,minRam,minStorage,buildNumber
1,438e99b8-9f23-4876-af5e-561deb054f4a,Microsoft Office 2019,1.6,2.0,9.0,0.0,0.0,0.0,2.0,4.0,10240
2,eb7ef6cd-079b-4c20-913e-0844a5d1cac8,Microsoft Office 2016,1.0,1.0,10.0,0.0,0.0,0.0,2.0,3.0,10240
3,3e3ca1c3-9717-4b77-b157-ee09b9639861,WPS Office 2019,0.0,1.0,0.0,0.0,0.0,0.0,2.0,4.0,9200
61,9de2dcda-7ea4-49bb-845c-c74c56e9944d,Microsoft Office 2021,1.6,2.0,10.0,0.0,0.0,0.0,2.0,4.0,10240


In [5]:
app_full.set_index("id", inplace=True)
choosen_apps = app_full.loc[ids]
choosen_apps.head()

KeyError: "None of [Index(['da2c57d2-2c35-4178-ba2c-a98ae33210a3'], dtype='object', name='id')] are in the [index]"

### find the highest values of each property

In [None]:
sys_req = pd.DataFrame({
    "maxSpeed": [choosen_apps["minCpuSpeed"].max()],
    "cores": [choosen_apps["minCores"].max()],
    "gpuMaxSpeed": [choosen_apps["minGpuBoostClock"].max()],
    "gpuMemory": [choosen_apps["minGpuMemory"].max()],
    "openGl": [choosen_apps["minOpenGl"].max()],
    "directX": [choosen_apps["minDirectX"].max()],
    "ram": [choosen_apps["minRam"].max()],
    "totalStorage": [choosen_apps["minStorage"].sum()],
    "buildNumber": [choosen_apps["buildNumber"].max()],
})
sys_req.head()

### DROP UNUSED LAPTOP PROPERTY

In [None]:
imp_laptop = laptops_full.drop(columns=["id", "name", "ssdStorage", "hddStorage"])
imp_laptop.head()

### MERGE AND NORMALIZE SYSTEM REQUIREMENTS DATA AND LAPTOP DATA

In [None]:
mixed_data = pd.concat([sys_req, imp_laptop], ignore_index=True)
mixed_data.head()

In [None]:
normalized = mixed_data.apply(zscore)
normalized.head()

### CALCULATE EUCLIDEAN DISTANCE

calculate system requirements and laptop distance

In [None]:
distance = euclidean_distances(normalized, normalized.values[0].reshape(1, -1))

In [None]:
laptops_full["distance"] = distance[1:]
asc_data = laptops_full.sort_values(by="distance", ascending=True).drop(columns=["ssdStorage", "hddStorage"])

print(sys_req)
asc_data.head()

### FILTER LAPTOP IF LOWER THAN SYSTEM REQUIREMENTS

In [None]:
baseSpeed = (laptops_full["maxSpeed"] >= sys_req["maxSpeed"].values[0])
cores = (laptops_full["cores"] >= sys_req["cores"].values[0])
gpuMaxSpeed = (laptops_full["gpuMaxSpeed"] >= sys_req["gpuMaxSpeed"].values[0])
directX = (laptops_full["directX"] >= sys_req["directX"].values[0])
openGl = (laptops_full["openGl"] >= sys_req["openGl"].values[0])
totalStorage = (laptops_full["totalStorage"] >= sys_req["totalStorage"].values[0])
buildNumber = (laptops_full["buildNumber"] >= sys_req["buildNumber"].values[0])
vramFromRam = sys_req["gpuMemory"].values[0] - laptops_full["gpuMemory"]
laptops_full["vramFromRam"] = vramFromRam

# if vramFromRam is negative, then vram from ram is not needed (0) alocated vram from ram

laptops_full.loc[laptops_full["vramFromRam"] <= 0, "vramFromRam"] = 0
ram = (laptops_full["ram"] >= (sys_req["ram"].values[0] + laptops_full["vramFromRam"]))

filtered =  laptops_full.loc[baseSpeed & cores & gpuMaxSpeed & directX & openGl & ram & totalStorage & buildNumber]
filtered = filtered.drop(columns=["hddStorage", "ssdStorage"])

print(sys_req)
print(choosen_apps["name"])
filtered.sort_values(by="distance", ascending=True).head()

In [None]:
filtered[filtered["ram"] < 8]