In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor

import torch

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv("/kaggle/input/laptop-price-prediction-cleaned-dataset/laptop_data_cleaned.csv")

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
df.head()

# EDA with Visualisations 📈📉📊

In [None]:
px.pie(df, names='Company',title='Most popular companies')

In [None]:
px.pie(df, names='TypeName',title='Most popular types of laptops')

In [None]:
px.pie(df, names='Cpu_brand',title='Laptopts with which CPUs are sold the most')

In [None]:
px.pie(df, names='Gpu_brand',title='Laptopts with which GPUs are sold the most')

In [None]:
intels = ['Intel Core i7', 'Intel Core i5', 'Other Intel Processor', 'Intel Core i3']
intel_gpu = df[df['Cpu_brand'].isin(intels)]
intel_gpu_f = intel_gpu['Gpu_brand'].value_counts()
amd_gpu = df[df['Cpu_brand'] == 'AMD Processor']
amd_gpu_f = amd_gpu['Gpu_brand'].value_counts()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=intel_gpu_f.index,values=intel_gpu_f, name='Most used GPUs with Intel CPUs'), 1, 1)
fig.add_trace(go.Pie(labels=amd_gpu_f.index,values=amd_gpu_f, name='Most used GPUs with AMD CPUs'), 1, 2)
fig.update_traces(hole=.5, hoverinfo="label+percent+name")
fig.update_layout(
    title_text="Popular GPUs among two different CPU Companies",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='GPUs with Intel CPUs', x=0.13, y=0.5, font_size=15, showarrow=False),
                 dict(text='GPUs with AMD', x=0.85, y=0.5, font_size=15, showarrow=False)])
fig.show()

In [None]:
grouped = df.groupby('Company')
prices = grouped['Price'].mean()
p_df = pd.DataFrame(prices)
p_df = p_df.sort_values('Price', ascending=False)
fig, axes = plt.subplots(1, 2, figsize=(15, 10))
sns.barplot(x=p_df.index[:10], y=p_df.iloc[:10, 0], ax=axes[0])
for container in axes[0].containers:
    axes[0].bar_label(container, size=10, color='black')
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=45)
p_df = p_df.sort_values('Price', ascending=True)
sns.barplot(x=p_df.index[:10], y=p_df.iloc[:10, 0], ax=axes[1])
for container in axes[1].containers:
    axes[1].bar_label(container, size=10, color='black')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45)
plt.suptitle("Top 10 companies with highest and lowest average laptop prices")
plt.show()

In [None]:
features = ['TypeName', 'Ram', 'Weight', 'TouchScreen', 'Ips', 'Ppi', 'Cpu_brand', 'HDD', 'SSD', 'Gpu_brand', 'Os']
target = 'Price'

# Scaling end encoding our values

In [None]:
le = LabelEncoder()
encoders = []
for i in features:
    df[i] = le.fit_transform(df[i])
    encoders += [le]

In [None]:
feat_mms = MinMaxScaler()
target_mms = MinMaxScaler()
df[features] = feat_mms.fit_transform(df[features])
df[target] = target_mms.fit_transform(df[[target]])

In [None]:
x = df.loc[:, features]
y = df.loc[:, target]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.2)

In [None]:
def training(model, name):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    acc = r2_score(y_pred, y_test)
    mse = mean_squared_error(y_pred, y_test)
    print("{}\nr2:{}\nMSE:{}".format(name, acc, mse))
    return model

# Defining models and tuning their hyperparameters

In [None]:
rfr = RandomForestRegressor(n_estimators=60, max_depth=150, criterion='friedman_mse')
svr = SVR(C=0.9)
lnr = LinearRegression()
knr = KNeighborsRegressor(n_neighbors=3)
abr = AdaBoostRegressor(learning_rate=0.9)
xgb = XGBRegressor()
models = [rfr, svr, lnr, knr, abr, xgb]
names = ['Random Forest', 'SVR', 'Linear Regression',
        'KNN', 'Ada Boost', 'XGBoost']

In [None]:
trained = []
for i, j in zip(models, names):
    trained += [training(i, j)]
    print("-"*10)

# Building our DeepLearning model 🚀

In [None]:
class PricePredictor(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(PricePredictor, self).__init__()
        self.layers = torch.nn.Sequential(torch.nn.Linear(in_channels, 32),
                                         torch.nn.ReLU(),
                                         torch.nn.BatchNorm1d(32),
                                         torch.nn.Linear(32, 64),
                                         torch.nn.ReLU(),
                                         torch.nn.BatchNorm1d(64))
        
        self.fc = torch.nn.Linear(64, out_channels)
        
    def forward(self, x):
        return self.fc(self.layers(x))

In [None]:
model = PricePredictor(len(features), 1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
criterion = torch.nn.MSELoss()

In [None]:
xt_train = torch.Tensor(x_train.values).type(torch.FloatTensor)
yt_train = torch.Tensor(y_train.values.reshape(-1, 1)).type(torch.FloatTensor)
xt_test = torch.Tensor(x_test.values).type(torch.FloatTensor)
yt_test = torch.Tensor(y_test.values.reshape(-1, 1)).type(torch.FloatTensor)

# Training out Deep Learning Model 🏋️‍♀️

In [None]:
for i in range(1, 501):
    model.train()
    optimizer.zero_grad()
    out = model(xt_train)
    loss = criterion(out, yt_train)
    if i%100 == 0:
        print("Epoch {} loss:".format(i), loss.item()/(out.size(0)))
    loss.backward()
    optimizer.step()

# Evaluating results of out model 📈

In [None]:
model.eval()
predicted = model(xt_test)
preds = predicted.detach().numpy()
mse = mean_squared_error(preds, y_test)
print("Mean Square Error: {}".format(mse))
print("R2 score: {}".format(r2_score(preds, y_test)))