# Inisiasi Library yg akan digunakan


In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv('Laptop_price.csv')

In [None]:
df.info()

In [None]:
df.isnull().sum()

# Visualisasi Data

In [None]:
sns.heatmap(df.isnull())

In [None]:
# Menghitung proporsi jumlah laptop per brand
brand_counts = df["Brand"].value_counts()

# Membuat pie chart
plt.figure(figsize=(8, 8))
plt.pie(brand_counts, labels=brand_counts.index, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired.colors)

# Menambahkan judul
plt.title("Proporsi Jumlah Laptop per Brand", fontsize=14)

# Menampilkan plot
plt.tight_layout()
plt.show()


In [None]:
# Membuat scatter plot untuk harga vs ukuran RAM
plt.figure(figsize=(10, 6))
plt.scatter(df["RAM_Size"], df["Price"], alpha=0.7, color='teal', edgecolor='black')

# Menambahkan judul dan label
plt.title("Hubungan antara RAM dan Harga Laptop", fontsize=14)
plt.xlabel("RAM (GB)", fontsize=12)
plt.ylabel("Harga (dalam satuan dataset)", fontsize=12)
plt.grid(alpha=0.5)

# Menampilkan plot
plt.tight_layout()
plt.show()


In [None]:
# Membuat scatter plot untuk harga vs kapasitas penyimpanan
plt.figure(figsize=(10, 6))
plt.scatter(df["Storage_Capacity"], df["Price"], alpha=0.7, color='purple', edgecolor='black')

# Menambahkan judul dan label
plt.title("Hubungan antara Kapasitas Penyimpanan dan Harga Laptop", fontsize=14)
plt.xlabel("Kapasitas Penyimpanan (GB)", fontsize=12)
plt.ylabel("Harga (dalam satuan dataset)", fontsize=12)
plt.grid(alpha=0.5)

# Menampilkan plot
plt.tight_layout()
plt.show()


In [None]:
# Membuat histogram untuk distribusi harga laptop
plt.figure(figsize=(10, 6))
plt.hist(df["Price"], bins=30, color='blue', edgecolor='black', alpha=0.7)

# Menambahkan judul dan label
plt.title("Distribusi Harga Laptop", fontsize=14)
plt.xlabel("Harga (dalam satuan dataset)", fontsize=12)
plt.ylabel("Frekuensi", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Menampilkan plot
plt.tight_layout()
plt.show()


# Pembuatan Model

In [None]:
label_cols = ['Brand']
le = LabelEncoder()
for col in label_cols:
    df[col] = le.fit_transform(df[col])
    print(df[col])

In [None]:
X = df.drop(['Weight', 'Price'], axis=1) #Fitur
y = df['Price'] #Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X)

In [12]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from lightgbm import LGBMRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

models = {
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'AdaBoost': AdaBoostRegressor(random_state=42),
    'Bagging': BaggingRegressor(random_state=42),
    'Extra Trees': ExtraTreesRegressor(random_state=42),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Linear Regression': LinearRegression(),
    'Support Vector Regression': SVR(),
    'LightGBM': LGBMRegressor(random_state=42, verbose=-1),
}


for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f'{name}:')
    print(f'Mean Absolute Error: {mae}')
    print(f'Mean Squared Error: {mse}')
    print(f'R² Score: {r2}')
    print("--------------------------------------")

In [None]:
df.info()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)
score = lr.score(X_train, y_train)
print(score)

In [16]:
import pickle

filename = 'estimasi_latop.sav'

pickle.dump(lr, open(filename, 'wb'))

In [17]:
import pickle

# Ekspor model ke file .pkl
with open('estimasi_harga_laptop_model.pkl', 'wb') as file:
    pickle.dump(lr, file)  
