In [1]:
# Import pustaka yang digunakan
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression

In [2]:
# Load dataset
df = pd.read_csv('LaptopClean.csv')
df.head()

Unnamed: 0,Storage,Brand,Model Name,Processor,Operating System,RAM,ScreenSize,Touch_Screen,Price
0,512.0,HP,15s-fq5007TU,Core i3,Windows 11 Home,8.0,15.0,No,38.99
1,512.0,HP,15s-fy5003TU,Core i3,Windows 11 Home,8.0,15.0,No,37.99
2,256.0,Apple,2020 Macbook Air,M1,Mac OS Big Sur,8.0,13.0,No,70.99
3,256.0,Apple,2020 Macbook Air,M1,Mac OS Big Sur,8.0,13.0,No,70.99
4,256.0,Apple,2020 Macbook Air,M1,Mac OS Big Sur,8.0,13.0,No,70.99


In [3]:
df = df.rename(columns={
    'Operating System': 'Operating_System',
    'ScreenSize' : 'Screen_Size',
    'Model Name' : 'Model_Name'
})

In [4]:
new_atribut = ['Brand', 'Model_Name', 'Processor', 'Operating_System', 'Touch_Screen', 'Storage', 'RAM', 'Screen_Size', 'Price']
data = df[new_atribut]

In [5]:
data.head()

Unnamed: 0,Brand,Model_Name,Processor,Operating_System,Touch_Screen,Storage,RAM,Screen_Size,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,No,512.0,8.0,15.0,38.99
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,No,512.0,8.0,15.0,37.99
2,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256.0,8.0,13.0,70.99
3,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256.0,8.0,13.0,70.99
4,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256.0,8.0,13.0,70.99


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Brand             837 non-null    object 
 1   Model_Name        837 non-null    object 
 2   Processor         837 non-null    object 
 3   Operating_System  837 non-null    object 
 4   Touch_Screen      837 non-null    object 
 5   Storage           837 non-null    float64
 6   RAM               837 non-null    float64
 7   Screen_Size       837 non-null    float64
 8   Price             837 non-null    float64
dtypes: float64(4), object(5)
memory usage: 59.0+ KB


In [7]:
data['Storage'] = df['Storage'].astype(int)
data['RAM'] = df['RAM'].astype(int)
data['Screen_Size'] = df['Screen_Size'].astype(int)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Brand             837 non-null    object 
 1   Model_Name        837 non-null    object 
 2   Processor         837 non-null    object 
 3   Operating_System  837 non-null    object 
 4   Touch_Screen      837 non-null    object 
 5   Storage           837 non-null    int64  
 6   RAM               837 non-null    int64  
 7   Screen_Size       837 non-null    int64  
 8   Price             837 non-null    float64
dtypes: float64(1), int64(3), object(5)
memory usage: 59.0+ KB


In [9]:
data.head()

Unnamed: 0,Brand,Model_Name,Processor,Operating_System,Touch_Screen,Storage,RAM,Screen_Size,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,No,512,8,15,38.99
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,No,512,8,15,37.99
2,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256,8,13,70.99
3,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256,8,13,70.99
4,Apple,2020 Macbook Air,M1,Mac OS Big Sur,No,256,8,13,70.99


In [10]:
data = data.drop(columns=['Brand', 'Model_Name'])
data.head()

Unnamed: 0,Processor,Operating_System,Touch_Screen,Storage,RAM,Screen_Size,Price
0,Core i3,Windows 11 Home,No,512,8,15,38.99
1,Core i3,Windows 11 Home,No,512,8,15,37.99
2,M1,Mac OS Big Sur,No,256,8,13,70.99
3,M1,Mac OS Big Sur,No,256,8,13,70.99
4,M1,Mac OS Big Sur,No,256,8,13,70.99


In [11]:
#change data type int unique
processor_mapping = {'Core i3': 0, 'M1': 1, 'Core i7': 2, 'Core i5': 3, 'Ryzen 5 Hexa Core': 4, 'Celeron Dual Core': 5, 'Ryzen 7 Octa Core': 6, 'Ryzen 5 Quad Core': 7, 'Ryzen 3 Dual Core': 8,
                     'Ryzen 3 Quad Core': 9, 'M2': 10, 'Celeron Quad Core': 11, 'Athlon Dual Core': 12, 'MediaTek Kompanio 1200': 13, 'Ryzen 9 Octa Core': 14, 'MediaTek MT8788': 15, 'Ryzen Z1 HexaCore': 16,
                     'MediaTek Kompanio 500': 17, 'Core i9': 18, 'MediaTek Kompanio 520': 19, 'Ryzen Z1 Octa Core': 20, 'Pentium Silver': 21, 'Ryzen 5': 22, 'M1 Max': 23, 'M2 Max': 24, 'M3 Pro': 25,
                     'M1 Pro': 26, 'Ryzen 7 Quad Core': 27, 'Ryzen 5 Dual Core': 28, 'Ryzen 9 16 Core': 29}

os_mapping = {'Windows 11 Home': 0,'Mac OS Big Sur': 1, 'DOS': 2, 'Mac OS Monterey': 3, 'Chrome': 4, 'Windows 10': 5, 'Windows 10 Home': 6, 'Prime OS': 7, 'Windows 11 Pro': 8, 'Ubuntu': 9, 'Windows 10 Pro': 10,
              'macOS Ventura': 11, 'macOS Sonoma': 12, 'Mac OS Mojave': 13}

ts_mapping = {'No': 0, 'Yes': 1}

data['Processor'] = data['Processor'].map(processor_mapping)
data['Operating_System'] = data['Operating_System'].map(os_mapping)
data['Touch_Screen'] = data['Touch_Screen'].map(ts_mapping)

In [12]:
data.head()

Unnamed: 0,Processor,Operating_System,Touch_Screen,Storage,RAM,Screen_Size,Price
0,0,0,0,512,8,15,38.99
1,0,0,0,512,8,15,37.99
2,1,1,0,256,8,13,70.99
3,1,1,0,256,8,13,70.99
4,1,1,0,256,8,13,70.99


In [13]:
data.isnull().sum()

Processor           0
Operating_System    0
Touch_Screen        0
Storage             0
RAM                 0
Screen_Size         0
Price               0
dtype: int64

In [14]:
# Separate features and target
X = data.drop(columns='Price')  # Gantilah 'target' dengan nama kolom target Anda
y = data['Price']  # Gantilah 'target' dengan nama kolom target Anda

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

In [17]:
pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, pred_linear)
mae_linear = mean_absolute_error(y_test, pred_linear)
rmse_linear = mean_squared_error(y_test, pred_linear, squared=False)
r2_linear = r2_score(y_test, pred_linear)

print(f'Mean Squared Error (MSE): {mse_linear:.2f}')
print(f'Mean Absolute Error (MAE): {mae_linear:.2f}')
print(f'Root Mean Squared Error (RMSE): {rmse_linear:.2f}')
print(f'Nilai R-squared dari model Regresi Linear sebesar: {r2_linear:.2f}')

Mean Squared Error (MSE): 447.95
Mean Absolute Error (MAE): 14.77
Root Mean Squared Error (RMSE): 21.16
Nilai R-squared dari model Regresi Linear sebesar: 0.18


In [18]:
X_train.head()

Unnamed: 0,Processor,Operating_System,Touch_Screen,Storage,RAM,Screen_Size
363,9,0,0,512,8,14
250,15,7,0,128,4,11
465,3,0,0,512,8,15
346,3,0,0,512,16,15
585,4,0,1,512,8,15


In [19]:
# Sample input
Processor = str(input('Processor: '))
Operating_System = str(input('Operating System: '))
Touch_Screen = int(input('Touch Screen: (0 or 1): '))
Storage = int(input('Storage: '))
RAM = int(input('RAM: '))
Screen_Size = int(input('Screen Size: '))

# Encode processor and operating system
processor_encode = processor_mapping.get(Processor, -1)
os_encode = os_mapping.get(Operating_System, -1)

dataInference = {
    'Processor': [processor_encode],
    'Operating_System': [os_encode],
    'Touch_Screen': [Touch_Screen],
    'Storage': [Storage],
    'RAM': [RAM],
    'Screen_Size': [Screen_Size],
}

dataInference_df = pd.DataFrame(dataInference)
pred_inference = linear_model.predict(dataInference_df)
print('Prediksi Harga Laptop dalam Rupee Sebesar', pred_inference)

Processor: Ryzen 5 Hexa Core
Operating System: Windows 11 Home
Touch Screen: (0 or 1): 0
Storage: 512
RAM: 8
Screen Size: 15
Prediksi Harga Laptop dalam Rupee Sebesar [40.50733547]


In [21]:
import pickle
pickle.dump(linear_model, open('linear.pkl', "wb"))