In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

import regex as re
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

#import xgboost
#from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

import optuna

import warnings
warnings.filterwarnings("ignore")
import random

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import plotly.express as px

In [None]:
sns.set_style("whitegrid")
sns.despine()
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True)
plt.rc("axes", labelweight="bold", labelsize="large", titleweight="bold", titlesize=14, titlepad=10)

seed=42

In [None]:
laptop = pd.read_csv('laptop_clean.csv')

laptop

In [None]:
laptop.info()

In [None]:
print(laptop.company)
print("")
print(laptop.company.unique())
print("")
print(laptop.company.value_counts())

In [None]:
jumlah_company = laptop['company'].value_counts()

colors = ['#'+''.join(random.choices('0123456789ABCDEF', k=6)) for _ in range(len(jumlah_company))]

plt.figure(figsize=(12, 8))
ax = jumlah_company.plot(kind='bar', color = colors)
plt.title("Distribusi Produk Berdasarkan Perusahaan")
plt.xlabel('Company')
plt.ylabel('Jumlah')
plt.xticks(rotation=45)

for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')

plt.show()

In [None]:
grouped_company = laptop.groupby('company').size().reset_index(name='counts')
grouped_company = grouped_company.sort_values(by='counts', ascending=False)

fig = px.bar(grouped_company, x='company', y='counts', color='company', labels={'counts':'Jumlah Laptop'}, title="Jumlah Laptop berdasarkan Perusahaan")
fig.show()

In [None]:
laptop.info()

In [None]:
jumlah_kategori = laptop['category'].value_counts()

colors = sns.color_palette('Set1', len(jumlah_kategori))

plt.figure(figsize=(12, 8))
ax = jumlah_kategori.plot(kind='bar', color=colors)
plt.title('Distribusi Laptop Berdasarkan Kategori/Tipe Laptop')
plt.xlabel('Kategori')
plt.ylabel('Jumlah Laptop')
plt.xticks(rotation=0)

for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() + p.get_width() / 2, p.get_height()), ha='center', va='bottom')

plt.show()

In [None]:
grouped_kategori = laptop.groupby('category').size().reset_index(name='counts')
grouped_kategori = grouped_kategori.sort_values(by='counts', ascending=False)

fig = px.bar(grouped_kategori, x = 'category', y = 'counts', color = 'category', labels={'counts': 'Jumlah Laptop'}, title="Jumlah Laptop Berdasarkan Kategori/Tipe Laptop")
fig.show()

In [None]:
laptop['screen(pixel)'] = laptop['screen(pixel)'].astype('str')

laptop['screen(pixel)'].unique()

In [None]:
jumlah_screen = laptop['screen(pixel)'].value_counts()
jumlah_screen = jumlah_screen.sort_index()

colors = sns.color_palette('Set1', len(jumlah_screen))

plt.figure(figsize=(10, 6))
plt.bar(jumlah_screen.index, jumlah_screen.values, color = colors)
plt.title("Distribusi Laptop Berdasarkan Ukuran Layar")
plt.xlabel('Ukuran Layar (inch)')
plt.ylabel('Jumlah Laptop')

plt.show()

In [None]:
laptop['ram(GB)'].value_counts()

In [None]:
x = laptop['ram(GB)']
y = laptop['price']

plt.figure(figsize=(10, 6))
plt.scatter(x, y)
plt.title('Korelasi Antara RAM(GB) dengan Harga')
plt.xlabel('RAM (GB)')
plt.ylabel('Harga')

plt.show()

In [None]:
sns.displot(laptop['company'], x = laptop['company'], col=laptop['category'], col_wrap=2, height=6, aspect=3.5, palette='Set1', hue=laptop['company'])
    
plt.show()

In [None]:
sns.set(style='darkgrid')

sns.regplot(data=laptop, x = 'ram(GB)', y = 'price')
sns.lmplot(data=laptop, x = 'ram(GB)', y = 'price')

plt.title("Korelasi Antara RAM(GB) dan Harga")
plt.xlabel('Ram(GB)')
plt.ylabel('Price')
plt.show()

In [None]:
price_min = laptop['price'].min()
price_max = laptop['price'].max()
laptop['price_normalized'] = (laptop['price'] - price_min / (price_max - price_min))

ram_min = laptop['ram(GB)'].min()
ram_max = laptop['ram(GB)'].max()
laptop['ram_normalized'] = (laptop['ram(GB)'] - ram_min / (ram_max - ram_min))

print(laptop[['price', 'price_normalized', 'ram(GB)', 'ram_normalized']])

Simple Feature Scaling

In [None]:
laptop["price_normalized"] = laptop["price"] / laptop["price"].max()
laptop["ram(GB)_normalized"] = laptop["ram(GB)"] / laptop["ram(GB)"].max()

print(laptop[['price', 'price_normalized', 'ram(GB)', 'ram_normalized']])

In [None]:
sns.set(style='darkgrid')

sns.regplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')
sns.lmplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')

plt.title("Korelasi Antara RAM(GB) dan Harga")
plt.xlabel('Ram(GB)')
plt.ylabel('Price')
plt.show()

Min-Max

In [None]:
laptop["price_normalized"] = (laptop["price"] - laptop["price"].min()) / (laptop["price"].max() - laptop["price"].min())
laptop["ram(GB)_normalized"] = (laptop["ram(GB)"] - laptop["ram(GB)"].min()) / (laptop["ram(GB)"].max() - laptop["ram(GB)"].min())

print(laptop[['price', 'price_normalized', 'ram(GB)', 'ram_normalized']])

In [None]:
sns.set(style='darkgrid')

sns.regplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')
sns.lmplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')

plt.title("Korelasi Antara RAM(GB) dan Harga")
plt.xlabel('Ram(GB)')
plt.ylabel('Price')
plt.show()

Z-Score

In [None]:
laptop["price_normalized"] = (laptop["price"] - laptop["price"].mean()) / laptop["price"].std()
laptop["ram(GB)_normalized"] = (laptop["ram(GB)"] - laptop["ram(GB)"].mean()) / laptop["ram(GB)"].std()

print(laptop[['price', 'price_normalized', 'ram(GB)', 'ram_normalized']])

In [None]:
sns.set(style='darkgrid')

sns.regplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')
sns.lmplot(data=laptop, x = 'ram(GB)_normalized', y = 'price_normalized')

plt.title("Korelasi Antara RAM(GB) dan Harga")
plt.xlabel('Ram(GB)')
plt.ylabel('Price')
plt.show()

In [None]:
laptop.describe()