In [71]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder


In [72]:
df = pd.read_csv("Laptop_price.csv")
df.head(5)

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,Asus,3.830296,16,512,11.185147,2.641094,17395.093065
1,Acer,2.912833,4,1000,11.311372,3.260012,31607.605919
2,Lenovo,3.241627,4,256,11.853023,2.029061,9291.023542
3,Acer,3.806248,16,512,12.28036,4.573865,17436.728334
4,Acer,3.268097,32,1000,14.990877,4.193472,32917.990718


In [73]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Brand             1000 non-null   object 
 1   Processor_Speed   1000 non-null   float64
 2   RAM_Size          1000 non-null   int64  
 3   Storage_Capacity  1000 non-null   int64  
 4   Screen_Size       1000 non-null   float64
 5   Weight            1000 non-null   float64
 6   Price             1000 non-null   float64
dtypes: float64(4), int64(2), object(1)
memory usage: 54.8+ KB


In [74]:
df.describe()

Unnamed: 0,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,2.750611,15.5,584.576,14.05679,3.466919,19604.187963
std,0.731796,10.988665,313.438517,1.705882,0.866541,9406.06488
min,1.51158,4.0,256.0,11.012111,2.00056,8570.01295
25%,2.089246,8.0,256.0,12.635523,2.717211,10114.012948
50%,2.760885,16.0,512.0,14.099643,3.46463,17287.241878
75%,3.36261,32.0,1000.0,15.52859,4.212583,31566.214754
max,3.998534,32.0,1000.0,16.985737,4.990728,33503.935037


In [75]:
df.isnull().sum()

Brand               0
Processor_Speed     0
RAM_Size            0
Storage_Capacity    0
Screen_Size         0
Weight              0
Price               0
dtype: int64

In [76]:
label_encoder = LabelEncoder()
df['Brand'] = label_encoder.fit_transform(df['Brand'])

X = df.drop(columns = "Price")
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Linear Regression Model without Feature Scaling

In [77]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
n = len(y_test) 
p = X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Model Score: 0.9996478987127725
Mean Absolute Error (MAE): 145.27235007959655
Mean Squared Error (MSE): 31969.582469869325
Root Mean Squared Error (RMSE): 178.800398405231
R-squared: 0.9996478987127725
Adjusted R-squared: 0.9996369525587654


### Decision Tree Model without Feature SCALING

In [78]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Model Score: 0.9991710678900825
Mean Absolute Error (MAE): 221.30659372836527
Mean Squared Error (MSE): 75264.17656293632
Root Mean Squared Error (RMSE): 274.34317298401345
R-squared: 0.9991710678900825
Adjusted R-squared: 0.9991452979799296


### Random Forest Regressor without Feature SCALING

In [79]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Model Score: 0.9995457489851665
Mean Absolute Error (MAE): 166.83886764717536
Mean Squared Error (MSE): 41244.42541829529
Root Mean Squared Error (RMSE): 203.08723598073632
R-squared: 0.9995457489851665
Adjusted R-squared: 0.9995316271919592


### Feature SCALING

In [80]:
columns_to_scale = ['Storage_Capacity',	'Screen_Size']
scaler = StandardScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])
df.head(5)

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,1,3.830296,16,-0.231664,-1.684219,2.641094,17395.093065
1,0,2.912833,4,1.32604,-1.610189,3.260012,31607.605919
2,4,3.241627,4,-1.048819,-1.29251,2.029061,9291.023542
3,0,3.806248,16,-0.231664,-1.041877,4.573865,17436.728334
4,0,3.268097,32,1.32604,0.547843,4.193472,32917.990718


In [81]:
X = df.drop(columns = "Price")
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Applying Models after feature SCALING and without OHE

In [82]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
n = len(y_test) 
p = X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Linear Regression Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Linear Regression Model Score: 0.9996478987127725
Mean Absolute Error (MAE): 145.2723500795961
Mean Squared Error (MSE): 31969.58246986911
Root Mean Squared Error (RMSE): 178.80039840523037
R-squared: 0.9996478987127725
Adjusted R-squared: 0.9996369525587654


In [83]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Decision Tree Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Decision Tree Model Score: 0.999110360779293
Mean Absolute Error (MAE): 227.67179078170867
Mean Squared Error (MSE): 80776.17284153965
Root Mean Squared Error (RMSE): 284.21149315525514
R-squared: 0.999110360779293
Adjusted R-squared: 0.9990827036014472


In [67]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Random Forest Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Random Forest Model Score: 0.9995505548167871
Mean Absolute Error (MAE): 165.38267499939295
Mean Squared Error (MSE): 40808.07248262664
Root Mean Squared Error (RMSE): 202.01008015103267
R-squared: 0.9995505548167871
Adjusted R-squared: 0.9995365824276717


### One Hot Encoding

In [84]:
df['Brand'] = label_encoder.inverse_transform(df['Brand'])

In [85]:
df['Brand'] = pd.Categorical(df['Brand'])
encoder = OneHotEncoder()
df_new = encoder.fit_transform(df[['Brand']])
df.head()

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,Asus,3.830296,16,-0.231664,-1.684219,2.641094,17395.093065
1,Acer,2.912833,4,1.32604,-1.610189,3.260012,31607.605919
2,Lenovo,3.241627,4,-1.048819,-1.29251,2.029061,9291.023542
3,Acer,3.806248,16,-0.231664,-1.041877,4.573865,17436.728334
4,Acer,3.268097,32,1.32604,0.547843,4.193472,32917.990718


In [86]:
df1 = pd.DataFrame(df_new.toarray(), columns=encoder.get_feature_names_out(['Brand']))
df = pd.concat([df, df1], axis=1)
df.drop('Brand', axis=1, inplace=True)

df.head()

Unnamed: 0,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price,Brand_Acer,Brand_Asus,Brand_Dell,Brand_HP,Brand_Lenovo
0,3.830296,16,-0.231664,-1.684219,2.641094,17395.093065,0.0,1.0,0.0,0.0,0.0
1,2.912833,4,1.32604,-1.610189,3.260012,31607.605919,1.0,0.0,0.0,0.0,0.0
2,3.241627,4,-1.048819,-1.29251,2.029061,9291.023542,0.0,0.0,0.0,0.0,1.0
3,3.806248,16,-0.231664,-1.041877,4.573865,17436.728334,1.0,0.0,0.0,0.0,0.0
4,3.268097,32,1.32604,0.547843,4.193472,32917.990718,1.0,0.0,0.0,0.0,0.0


In [87]:
df['RAM_Size'] = pd.Categorical(df['RAM_Size'])
encoder = OneHotEncoder()
df_new = encoder.fit_transform(df[['RAM_Size']])
df1 = pd.DataFrame(df_new.toarray(), columns=encoder.get_feature_names_out(['RAM_Size']))
df = pd.concat([df, df1], axis=1)
df.drop('RAM_Size', axis=1, inplace=True)

In [88]:
df.head()

Unnamed: 0,Processor_Speed,Storage_Capacity,Screen_Size,Weight,Price,Brand_Acer,Brand_Asus,Brand_Dell,Brand_HP,Brand_Lenovo,RAM_Size_4,RAM_Size_8,RAM_Size_16,RAM_Size_32
0,3.830296,-0.231664,-1.684219,2.641094,17395.093065,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2.912833,1.32604,-1.610189,3.260012,31607.605919,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,3.241627,-1.048819,-1.29251,2.029061,9291.023542,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,3.806248,-0.231664,-1.041877,4.573865,17436.728334,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,3.268097,1.32604,0.547843,4.193472,32917.990718,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [89]:
X = df.drop(columns = "Price")
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### With OHE as well

In [90]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
n = len(y_test) 
p = X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Linear Regression Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Linear Regression Model Score: 0.9996444762461612
Mean Absolute Error (MAE): 146.198007372331
Mean Squared Error (MSE): 32280.330633959322
Root Mean Squared Error (RMSE): 179.6672775826453
R-squared: 0.9996444762461612
Adjusted R-squared: 0.9996196278117531


In [91]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Decision Tree Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Decision Tree Model Score: 0.9991499571072225
Mean Absolute Error (MAE): 223.04274632661662
Mean Squared Error (MSE): 77180.96283473715
Root Mean Squared Error (RMSE): 277.81461954824687
R-squared: 0.9991499571072225
Adjusted R-squared: 0.9990905455071897


In [92]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Random Forest Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Random Forest Model Score: 0.9995607543744477
Mean Absolute Error (MAE): 163.4882214846153
Mean Squared Error (MSE): 39881.987825699995
Root Mean Squared Error (RMSE): 199.7047516352578
R-squared: 0.9995607543744477
Adjusted R-squared: 0.9995300544113714


### Backward Elimination

In [93]:
X = df.drop(columns = "Price")
y = df['Price']
def backward_elimination(X, y, significance_level=0.05):
    num_features = X.shape[1]
    for i in range(num_features):
        model = sm.OLS(y, X).fit()
        max_p_value = max(model.pvalues)
        if max_p_value > significance_level:
            max_p_value_index = model.pvalues.idxmax()
            print(f"Removing feature '{max_p_value_index}' with p-value {max_p_value}")
            X = X.drop(max_p_value_index, axis=1)
        else:
            break
    return model
final_model = backward_elimination(X_train, y_train)

print(final_model.summary())



Removing feature 'Weight' with p-value 0.38099742868921416
                            OLS Regression Results                            
Dep. Variable:                  Price   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.737e+05
Date:                Wed, 20 Mar 2024   Prob (F-statistic):               0.00
Time:                        21:15:31   Log-Likelihood:                -5372.3
No. Observations:                 800   AIC:                         1.077e+04
Df Residuals:                     789   BIC:                         1.082e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------

In [94]:
X = df.drop(columns = "Price")
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [95]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
n = len(y_test) 
p = X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Linear Regression Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Linear Regression Model Score: 0.9996444762461612
Mean Absolute Error (MAE): 146.198007372331
Mean Squared Error (MSE): 32280.330633959322
Root Mean Squared Error (RMSE): 179.6672775826453
R-squared: 0.9996444762461612
Adjusted R-squared: 0.9996196278117531


In [96]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Decision Tree Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Decision Tree Model Score: 0.9991259378452492
Mean Absolute Error (MAE): 226.7630584201549
Mean Squared Error (MSE): 79361.82897859467
Root Mean Squared Error (RMSE): 281.7123159867077
R-squared: 0.9991259378452492
Adjusted R-squared: 0.9990648474795946


In [97]:
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mae,mse = mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)
rmse, r2 = np.sqrt(mse), r2_score(y_test, y_pred)
n,p = len(y_test), X_test.shape[1]
adj_r2 = 1 - ((1 - r2) * (n - 1) / (n - p - 1))
print("Random Forest Model Score:", model.score(X_test, y_test))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r2)
print("Adjusted R-squared:", adj_r2)

Random Forest Model Score: 0.9995587231380055
Mean Absolute Error (MAE): 165.93536813501004
Mean Squared Error (MSE): 40066.41708884595
Root Mean Squared Error (RMSE): 200.1659738538145
R-squared: 0.9995587231380055
Adjusted R-squared: 0.9995278812067908
