# Model Inference
---

Pada model ini adalah tahapan saat model yang sudah dilatih digunakan untuk memprediksi output baru dari data baru yang tidak pernah dilihat sebelumnya. 

In [1]:
#import libraries

import pickle
import pandas as pd

In [2]:
with open('model_file.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

In [3]:
user_1={
    'Company' : 'Apple',
    'Product' : 'Intel Core 7 CPU',
    'TypeName' : 'Notebook',
    'Inches' : 13.3,
    'Ram' : 16,
    'OS' : 'MacOS',
    'Weight' : 1.34,
    'Screen' : 'Standard',
    'ScreenW' : 2880,
    'ScreenH' : 1080,
    'Touchscreen' : 'No',
    'IPSpanel' : 'Yes',
    'RetinaDisplay' : 'Yes',
    'CPU_company' : 'Intel',
    'CPU_freq' : 2.5,
    'CPU_model' : 'Core i5',
    'PrimaryStorage': 128,
    'SecondaryStorage' : 500,
    'PrimaryStorageType' : 'SSD',
    'SecondaryStorageType' : 'HDD',
    'GPU_company' : 'Intel',
    'GPU_model' : 'HD Graphics 620'
}

df = pd.DataFrame([user_1])
df

Unnamed: 0,Company,Product,TypeName,Inches,Ram,OS,Weight,Screen,ScreenW,ScreenH,...,RetinaDisplay,CPU_company,CPU_freq,CPU_model,PrimaryStorage,SecondaryStorage,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model
0,Apple,Intel Core 7 CPU,Notebook,13.3,16,MacOS,1.34,Standard,2880,1080,...,Yes,Intel,2.5,Core i5,128,500,SSD,HDD,Intel,HD Graphics 620


In [4]:
# Mapping Ram
mapping = {
    2: 1,
    4: 2,
    6: 3,
    8: 4,
    12: 5,
    16: 6,
    24: 7,
    32: 8,
    64: 9
}

df['Ram_category'] = df['Ram'].map(mapping)
df

Unnamed: 0,Company,Product,TypeName,Inches,Ram,OS,Weight,Screen,ScreenW,ScreenH,...,CPU_company,CPU_freq,CPU_model,PrimaryStorage,SecondaryStorage,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model,Ram_category
0,Apple,Intel Core 7 CPU,Notebook,13.3,16,MacOS,1.34,Standard,2880,1080,...,Intel,2.5,Core i5,128,500,SSD,HDD,Intel,HD Graphics 620,6


In [5]:
# Nilai yang dihapus
nilai_dihapus = [8, 508, 240, 180]

# Menghapus baris di mana primarystorage bernilai di daftar
df = df[~df['PrimaryStorage'].isin(nilai_dihapus)]

df['PrimaryStorage'].unique()

array([128], dtype=int64)

In [6]:
# Mapping PrimaryStorage
mapping = {
    16: 1,
    32: 2,
    64: 3,
    128: 4,
    256: 5,
    500: 6,
    512: 7,
    1024: 8,
    2048: 9
}

df['PrimaryStorage_category'] = df['PrimaryStorage'].map(mapping)
print(df)

  Company           Product  TypeName  Inches  Ram     OS  Weight    Screen  \
0   Apple  Intel Core 7 CPU  Notebook    13.3   16  MacOS    1.34  Standard   

   ScreenW  ScreenH  ... CPU_freq CPU_model PrimaryStorage SecondaryStorage  \
0     2880     1080  ...      2.5   Core i5            128              500   

   PrimaryStorageType SecondaryStorageType  GPU_company        GPU_model  \
0                 SSD                  HDD        Intel  HD Graphics 620   

  Ram_category PrimaryStorage_category  
0            6                       4  

[1 rows x 24 columns]


In [7]:
# Mapping SecondaryStorage
mapping = {
    0: 1,
    256: 2,
    500: 3,
    512: 4,
    1024: 5,
    2048: 6
}

df['SecondaryStorage_category'] = df['SecondaryStorage'].map(mapping)
print(df)

  Company           Product  TypeName  Inches  Ram     OS  Weight    Screen  \
0   Apple  Intel Core 7 CPU  Notebook    13.3   16  MacOS    1.34  Standard   

   ScreenW  ScreenH  ... CPU_model PrimaryStorage SecondaryStorage  \
0     2880     1080  ...   Core i5            128              500   

  PrimaryStorageType  SecondaryStorageType GPU_company        GPU_model  \
0                SSD                   HDD       Intel  HD Graphics 620   

   Ram_category PrimaryStorage_category SecondaryStorage_category  
0             6                       4                         3  

[1 rows x 25 columns]


In [8]:
# Menghapus feature Ram, PrimaryStorage, dan SecondaryStorage

df = df.drop(columns=['Ram', 'PrimaryStorage', 'SecondaryStorage'])
df.head(2)

Unnamed: 0,Company,Product,TypeName,Inches,OS,Weight,Screen,ScreenW,ScreenH,Touchscreen,...,CPU_company,CPU_freq,CPU_model,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model,Ram_category,PrimaryStorage_category,SecondaryStorage_category
0,Apple,Intel Core 7 CPU,Notebook,13.3,MacOS,1.34,Standard,2880,1080,No,...,Intel,2.5,Core i5,SSD,HDD,Intel,HD Graphics 620,6,4,3


In [9]:
# Mengubah tipe data menjadi string
df['Ram_category'] = df['Ram_category'].astype(str)
df['PrimaryStorage_category'] = df['PrimaryStorage_category'].astype(str)
df['SecondaryStorage_category'] = df['SecondaryStorage_category'].astype(str)

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 22 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Company                    1 non-null      object 
 1   Product                    1 non-null      object 
 2   TypeName                   1 non-null      object 
 3   Inches                     1 non-null      float64
 4   OS                         1 non-null      object 
 5   Weight                     1 non-null      float64
 6   Screen                     1 non-null      object 
 7   ScreenW                    1 non-null      int64  
 8   ScreenH                    1 non-null      int64  
 9   Touchscreen                1 non-null      object 
 10  IPSpanel                   1 non-null      object 
 11  RetinaDisplay              1 non-null      object 
 12  CPU_company                1 non-null      object 
 13  CPU_freq                   1 non-null      float64
 14

In [10]:
df = df.drop(columns=['ScreenW', 'ScreenH'])
df.head(2)

Unnamed: 0,Company,Product,TypeName,Inches,OS,Weight,Screen,Touchscreen,IPSpanel,RetinaDisplay,CPU_company,CPU_freq,CPU_model,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model,Ram_category,PrimaryStorage_category,SecondaryStorage_category
0,Apple,Intel Core 7 CPU,Notebook,13.3,MacOS,1.34,Standard,No,Yes,Yes,Intel,2.5,Core i5,SSD,HDD,Intel,HD Graphics 620,6,4,3


## Cardinality

In [11]:
# Hitung  company teratas berdasarkan frekuensi
top_8 = df['Company'].value_counts().nlargest(8).index

# Mapping: selain top 8 jadi 'Others'
df['Company_name'] = df['Company'].apply(lambda x: x if x in top_8 else 'Others')

df['Company_name'].unique()


array(['Apple'], dtype=object)

In [12]:
# Mapping CPU Performance
CPU_Performance = {
    'Low' : ['Core M m3', 'Atom x5-Z8350', 'E-Series E2-9000e', 'Atom x5-Z8300', 'E-Series E2-6110',
            'A6-Series 9220', 'Celeron Dual Core N3350', 'Pentium Quad Core N4200', 'Atom x5-Z8550',
            'Celeron Dual Core N3060', 'E-Series 6110', 'Core M 6Y75', 'E-Series 9000e',
            'A6-Series A6-9220', 'Celeron Dual Core 3205U', 'Celeron Dual Core 3855U',
            'Pentium Quad Core N3710', 'Celeron Quad Core N3450', 'Core M m3-7Y30',
            'E-Series 7110', 'Core M 7Y30', 'Celeron Quad Core N3160', 'E-Series E2-9000',
            'Celeron Dual Core N3050', 'Core M M3-6Y30', 'A6-Series 7310', 'Atom Z8350',
            'Pentium Dual Core N4200', 'Celeron Quad Core N3710', 'Core M',
            'Pentium Dual Core 4405U', 'A4-Series 7210', 'Core M m7-6Y75', 'Core M M7-6Y75',
            'Atom X5-Z8350', 'Pentium Dual Core 4405Y', 'Pentium Quad Core N3700',
            'Core M 6Y54', 'Cortex A72&A53', 'E-Series 9000', 'Core M 6Y30'],
    
    'Medium' : ['Core i5', 'Core i5 7200U', 'A9-Series 9420', 'Core i5 8250U', 'Core i3 6006U',
                'Core i3 7100U', 'Core i5 7300HQ', 'Core i3 7130U', 'Ryzen 1700', 'Core i5 6200U',
                'Core i5 7500U', 'Core i5 7Y54', 'Xeon E3-1505M V6', 'A10-Series A10-9620P',
                'A10-Series 9600P', 'A8-Series 7410', 'A12-Series 9720P', 'Core i5 7300U',
                'Core i5 6440HQ', 'Ryzen 1600', 'Core i5 7440HQ', 'Core i5 7Y57', 'Core i3 6100U',
                'A10-Series 9620P', 'A9-Series A9-9420', 'Xeon E3-1535M v6', 'Core i5 6300U',
                'Core i5 6300HQ', 'Xeon E3-1535M v5', 'Core i5 6260U', 'A12-Series 9700P',
                'FX 8800P', 'A9-Series 9410'],
                
    'High' : ['Core i7', 'Core i7 8550U', 'Core i7 7500U', 'Core i7 8650U', 'Core i7 7700HQ',
                'FX 9830P', 'Core i7 7560U', 'Core i7 6920HQ', 'Core i7 7820HK', 'Core i7 6500U',
                'Core i7 6600U', 'Core i7 7820HQ', 'Core i7 7600U', 'Core i7 6820HQ', 'Core i7 7Y75',
                'Core i7 7660U', 'Core i7 6700HQ', 'Core i7 6820HK', 'Core i7 6560U']
}


In [13]:
# Function untuk menentukan CPU performance
def penentuan_cpu_perf(cpu_list):
    result = []
    for cpu in cpu_list:
        found = False
        for perf, cpu_perf in CPU_Performance.items():
            if cpu in cpu_perf:
                result.append(perf)
                found = True
                break
        if not found:
            result.append('Unknown')
    return result


In [14]:
# Membuat feature CPU Performance berdasarkan mapping
df['CPU_performance'] = penentuan_cpu_perf(df['CPU_model'].tolist())
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,OS,Weight,Screen,Touchscreen,IPSpanel,RetinaDisplay,...,CPU_model,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model,Ram_category,PrimaryStorage_category,SecondaryStorage_category,Company_name,CPU_performance
0,Apple,Intel Core 7 CPU,Notebook,13.3,MacOS,1.34,Standard,No,Yes,Yes,...,Core i5,SSD,HDD,Intel,HD Graphics 620,6,4,3,Apple,Medium


In [15]:
# Pengelompokkan GPU Performance
GPU_performance = {
    'Low' : ['HD Graphics 400', 'HD Graphics 405', 'HD Graphics 500', 'HD Graphics 505',
        'HD Graphics 510', 'HD Graphics 515', 'HD Graphics 520', 'HD Graphics 530',
        'HD Graphics 5300', 'HD Graphics 540', 'HD Graphics 6000', 'HD Graphics 615',
        'HD Graphics 620', 'HD Graphics 620 ', 'HD Graphics', 'UHD Graphics 620', 'Graphics 620',
        'Iris Graphics 540', 'Iris Graphics 550', 'Iris Plus Graphics 640', 'Iris Plus Graphics 650',
        'Iris Pro Graphics', 'Radeon R2', 'Radeon R2 Graphics', 'Radeon R3', 'Radeon R4', 'Radeon R4 Graphics',
        'Radeon R5', 'Radeon R5 M315', 'Radeon R5 M330', 'Radeon R5 M420', 'Radeon R5 M420X',
        'Radeon R5 M430', 'Radeon R5 430', 'Radeon R5 520', 'Radeon R7', 'Radeon R7 M360', 'Radeon R7 M365X', 
        'Radeon R7 M440','Radeon R7 M445', 'Radeon R7 M460', 'Radeon R7 M465', 'Radeon R7 Graphics',
        'GeForce 920', 'GeForce 920M', 'GeForce 920MX', 'GeForce 920MX ', 'R4 Graphics',
        'GeForce 930M', 'GeForce 930MX', 'GeForce 930MX ', 'GeForce 940M',
        'GeForce 940MX', 'GeForce GT 940MX', 'Mali T860 MP4', 'R17M-M1-70'],

    'Medium' : ['GeForce GTX 950M', 'GeForce GTX 960M', 'GeForce GTX 965M',
        'GeForce GTX 1050', 'GeForce GTX 1050M', 'GeForce GTX 1050 Ti',
        'GeForce GTX 1050Ti', 'GeForce GTX 930MX', 'GeForce MX130', 'GeForce MX150',
        'GeForce 150MX','Radeon 520', 'Radeon 530', 'Radeon 540', 'Radeon RX 540',
        'Radeon RX 550', 'Radeon RX 560','Quadro M520M', 'Quadro M500M',
        'Quadro M1000M', 'Quadro M1200', 'Quadro M2000M', 'Quadro M2200', 'Quadro M2200M',
        'Quadro M620', 'Quadro M620M', 'FirePro W4190M', 'FirePro W4190M ', 'FirePro W5130M',
        'Radeon Pro 455', 'Radeon Pro 555', 'Radeon Pro 560', 'GeForce GTX 940MX', 
        'GeForce GTX 940M', 'GeForce 960M', 'HD Graphics 630', 'Radeon R9 M385'],

    'High' : ['GeForce GTX 960', 'GeForce GTX 960<U+039C>', 'GeForce GTX 970M',
        'GeForce GTX 980', 'GeForce GTX 980 ', 'GeForce GTX 980M', 'GeForce GTX1050 Ti',
        'GeForce GTX 1060', 'GeForce GTX1060', 'GeForce GTX 1070', 'GeForce GTX1050 Ti',
        'GeForce GTX 1070M', 'GeForce GTX 1080', 'GeForce GTX1080',
        'Radeon RX 580', 'FirePro W6150M', 'Quadro 3000M', 'Quadro M3000M', 'GTX 980 SLI',
        'GeForce GTX 1060']
}

In [16]:
# Function untuk menentukan GPU performance
def penentuan_gpu_perf(gpu_list):
    result = []
    for gpu in gpu_list:
        found = False
        for perf, gpu_perf in GPU_performance.items():
            if gpu in gpu_perf:
                result.append(perf)
                found = True
                break
        if not found:
            result.append('Unknown')
    return result


In [17]:
# Membuat feature GPU Performance berdasarkan mapping
df['GPU_performance'] = penentuan_gpu_perf(df['GPU_model'].tolist())
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,OS,Weight,Screen,Touchscreen,IPSpanel,RetinaDisplay,...,PrimaryStorageType,SecondaryStorageType,GPU_company,GPU_model,Ram_category,PrimaryStorage_category,SecondaryStorage_category,Company_name,CPU_performance,GPU_performance
0,Apple,Intel Core 7 CPU,Notebook,13.3,MacOS,1.34,Standard,No,Yes,Yes,...,SSD,HDD,Intel,HD Graphics 620,6,4,3,Apple,Medium,Low


In [18]:
# Menghapus feature Company, CPU_model, dan GPU_model
df = df.drop(columns=['Company', 'Product', 'CPU_model', 'GPU_model'])

# additional setting
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
df.head(2)

Unnamed: 0,TypeName,Inches,OS,Weight,Screen,Touchscreen,IPSpanel,RetinaDisplay,CPU_company,CPU_freq,PrimaryStorageType,SecondaryStorageType,GPU_company,Ram_category,PrimaryStorage_category,SecondaryStorage_category,Company_name,CPU_performance,GPU_performance
0,Notebook,13.3,MacOS,1.34,Standard,No,Yes,Yes,Intel,2.5,SSD,HDD,Intel,6,4,3,Apple,Medium,Low


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 19 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   TypeName                   1 non-null      object 
 1   Inches                     1 non-null      float64
 2   OS                         1 non-null      object 
 3   Weight                     1 non-null      float64
 4   Screen                     1 non-null      object 
 5   Touchscreen                1 non-null      object 
 6   IPSpanel                   1 non-null      object 
 7   RetinaDisplay              1 non-null      object 
 8   CPU_company                1 non-null      object 
 9   CPU_freq                   1 non-null      float64
 10  PrimaryStorageType         1 non-null      object 
 11  SecondaryStorageType       1 non-null      object 
 12  GPU_company                1 non-null      object 
 13  Ram_category               1 non-null      object 
 14

Notebook    1
Name: TypeName, dtype: int64

In [20]:
y_pred_inf = model.predict(df)

In [21]:
print(y_pred_inf[0])

2398.1367


Setelah dilakukan pengecekan, didapat hasil dari kriteria laptop yang diinginkan mendapatkan prediksi harga di 2398.1367 euro.