In [1]:
import pandas as pd
import numpy as np
import mariadb

## Connecting to database to collect data deriven from website

In [62]:
# Connecting to db
try:
    conn = mariadb.connect(
        user="root",
        password="root",
        host="127.0.0.1",
        port=3306,
        database="laptops"
    )
except mariadb.Error as e:
    print(f"Error connecting to MariaDB Platform: {e}")
    sys.exit(1)

print('Connected to db laptops')

cur = conn.cursor()

Connected to db laptops


In [63]:
query = 'SELECT * from laptops;'
df = pd.read_sql(query, conn)

In [64]:
conn.close()

## Cleaning and preparing data 

In [65]:
df.head()

Unnamed: 0,title,price,graphicsCard,processor,screen,ram,memmory
0,Acer Aspire 5 Core i5-1035G1 8GB 512GB SSD 15....,£579.97,UHD Graphics 620,Intel Core i5 1035G1,15.6 Inch Full HD Screen,8GB,512GB
1,Lenovo V15-IIL Core i5-1035G1 8GB 256GB SSD 15...,£549.97,UHD Graphics 620,Intel Core i5 1035G1,15.6 Inch Full HD Screen,8GB,256GB
2,Refurbished Lenovo Yoga 11e Intel Celeron N294...,£119.97,,Intel Celeron N2940,11.6 Inch 1366 x 768 Screen,4GB,16GB
3,Asus C223NA Intel Celeron N3350 4GB 32GB eMMC ...,£179.97,,Intel Celeron N3350,11.6 Inch 1366 x 768 Screen,4GB,32GB
4,CODA 1.2 Intel Celeron N4020 4GB 64GB eMMC 12....,£199.97,UHD Graphics 620,Intel Celeron N4020,12.5 Inch 1366 x 768 Screen,4GB,64GB


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         879 non-null    object
 1   price         879 non-null    object
 2   graphicsCard  879 non-null    object
 3   processor     879 non-null    object
 4   screen        879 non-null    object
 5   ram           879 non-null    object
 6   memmory       879 non-null    object
dtypes: object(7)
memory usage: 48.2+ KB


In [9]:
df.describe()

Unnamed: 0,title,price,graphicsCard,processor,screen,ram,memmory
count,879,879,879.0,879,879,879,879
unique,781,330,47.0,148,60,7,15
top,Refurbished HP 11A G8 AMD A4-9120C 4GB 32GB 11...,£179.97,,Intel Core i5 10210U,14 Inch Full HD Screen,8GB,256GB
freq,2,21,473.0,59,240,439,286


In [10]:
df.isnull().sum()

title           0
price           0
graphicsCard    0
processor       0
screen          0
ram             0
memmory         0
dtype: int64

In [11]:
df.replace({"": None}).isnull().sum()

title             0
price            16
graphicsCard    473
processor        45
screen           43
ram              45
memmory          57
dtype: int64

### Replacing spaces with null value

In [66]:
df.replace({"":None}, inplace=True)

In [13]:
df.isnull().sum()

title             0
price            16
graphicsCard    473
processor        45
screen           43
ram              45
memmory          57
dtype: int64

In [15]:
no_price_data = df[ df['price'].isnull() ]
no_price_data.head(5)

Unnamed: 0,title,price,graphicsCard,processor,screen,ram,memmory
263,Acer Travel Mate B3 Intel Celeron N4020 4GB 6...,,UHD Graphics 620,Intel Celeron N4020,11.6 Inch 1366 x 768 Screen,4GB,64GB
275,Toshiba Dynabook Satellite Pro C40-H-105 Core ...,,UHD Graphics 620,Intel Core i7 1065G7,14 Inch Full HD Screen,8GB,256GB
290,Acer Travel Mate Spin B3 Intel Pentium N5030 4...,,UHD Graphics 620,Intel Pentium N5030,11.6 Inch Full HD Screen,4GB,128GB
370,Toshiba Dynabook Satellite Pro C40-G-111 Core ...,,UHD Graphics 620,Intel Core i3 10110U,14 Inch 1366 x 768 Screen,8GB,256GB
418,Toshiba Dynabook Satellite Pro C40-H-111 Core ...,,UHD Graphics 620,Intel Core i5 1035G1,14 Inch Full HD Screen,8GB,256GB


### converting price values to float objects

In [67]:
df['price'] = df['price'].str[1:]

In [68]:
df['price'] = df['price'].astype('float')

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   title         879 non-null    object 
 1   price         863 non-null    float64
 2   graphicsCard  406 non-null    object 
 3   processor     834 non-null    object 
 4   screen        836 non-null    object 
 5   ram           834 non-null    object 
 6   memmory       822 non-null    object 
dtypes: float64(1), object(6)
memory usage: 48.2+ KB


In [25]:
df.isnull().sum()

title             0
price            16
graphicsCard    473
processor        45
screen           43
ram              45
memmory          57
dtype: int64

### converting RAM values to float objects

In [28]:
df['ram'].unique()

array(['8GB', '4GB', '16GB', None, '2GB', '32GB', '64GB'], dtype=object)

In [30]:
df['ram'].str[:-2].unique()

array(['8', '4', '16', None, '2', '32', '64'], dtype=object)

In [69]:
df['ram'] = df['ram'].str[:-2]

In [70]:
df['ram'] = df['ram'].astype('float')

In [189]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   title         879 non-null    object 
 1   price         863 non-null    float64
 2   graphicsCard  406 non-null    object 
 3   processor     834 non-null    object 
 4   screen        836 non-null    float64
 5   ram           834 non-null    float64
 6   memmory       822 non-null    float64
dtypes: float64(4), object(3)
memory usage: 48.2+ KB


### converting Memmory values to float objects

In [38]:
df['memmory'].unique()

array(['512GB', '256GB', '16GB', '32GB', '64GB', '1TB Hard Drive + 1GB',
       '128GB', '1GB', '1TB', '2TB', None, '120GB',
       '1TB Hard Drive + 512GB', '1TB Hard Drive + 256GB', '255GB'],
      dtype=object)

In [40]:
df['memmory'].str.replace('TB','000GB').unique()

array(['512GB', '256GB', '16GB', '32GB', '64GB',
       '1000GB Hard Drive + 1GB', '128GB', '1GB', '1000GB', '2000GB',
       None, '120GB', '1000GB Hard Drive + 512GB',
       '1000GB Hard Drive + 256GB', '255GB'], dtype=object)

In [71]:
df['memmory']=df['memmory'].str.replace('TB','000GB')

In [82]:
df[ df['memmory'].str.find('+') > 0 ]['memmory'].str[18:]

13       + 1GB
288    + 512GB
414    + 512GB
467    + 512GB
523    + 512GB
539    + 256GB
690    + 256GB
Name: memmory, dtype: object

In [92]:
tmp = df['memmory'].str.find('+') > 0 
df.loc[tmp, 'memmory'] =df.loc[tmp, 'memmory'].str[18:]

In [93]:
df['memmory'].unique()

array(['512GB', '256GB', '16GB', '32GB', '64GB', '+ 1GB', '128GB', '1GB',
       '1000GB', '2000GB', None, '120GB', '+ 512GB', '+ 256GB', '255GB'],
      dtype=object)

In [102]:
df['memmory']=df['memmory'].str.replace('+','')

  df['memmory']=df['memmory'].str.replace('+','')


In [103]:
df['memmory'].unique()

array(['512GB', '256GB', '16GB', '32GB', '64GB', ' 1GB', '128GB', '1GB',
       '1000GB', '2000GB', None, '120GB', ' 512GB', ' 256GB', '255GB'],
      dtype=object)

In [108]:
df['memmory']=df['memmory'].str.strip()

In [109]:
df['memmory'].unique()

array(['512GB', '256GB', '16GB', '32GB', '64GB', '1GB', '128GB', '1000GB',
       '2000GB', None, '120GB', '255GB'], dtype=object)

In [110]:
df['memmory'] = df['memmory'].str[:-2]

In [111]:
df['memmory'].unique()

array(['512', '256', '16', '32', '64', '1', '128', '1000', '2000', None,
       '120', '255'], dtype=object)

In [113]:
df['memmory']=df['memmory'].astype('float')

In [114]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   title         879 non-null    object 
 1   price         863 non-null    float64
 2   graphicsCard  406 non-null    object 
 3   processor     834 non-null    object 
 4   screen        836 non-null    object 
 5   ram           834 non-null    float64
 6   memmory       822 non-null    float64
dtypes: float64(3), object(4)
memory usage: 48.2+ KB


### converting screen values to float objects

In [135]:
df['screen'].str.find('Inch').unique() 

array([ 5.,  3., nan])

In [136]:
tmp = df['screen'].str.find('Inch') > 0 
df.loc[tmp, 'screen'] =df.loc[tmp, 'screen'].str[:5]

In [137]:
df['screen'].unique()

array(['15.6 ', '11.6 ', '12.5 ', '14 In', '13.5 ', '17.3 ', '14.1 ',
       '13.3 ', None, '16 In', '12 In', '14.5 ', '13.4 ', '13 In',
       '15 In', '12.3 ', '13.9 ', '11 In', '12.4 '], dtype=object)

In [139]:
df['screen']=df['screen'].str.replace('In','')

In [140]:
df['screen']=df['screen'].str.strip()

In [141]:
df['screen'].unique()

array(['15.6', '11.6', '12.5', '14', '13.5', '17.3', '14.1', '13.3', None,
       '16', '12', '14.5', '13.4', '13', '15', '12.3', '13.9', '11',
       '12.4'], dtype=object)

In [142]:
df['screen'] = df['screen'].astype('float')

In [143]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 879 entries, 0 to 878
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   title         879 non-null    object 
 1   price         863 non-null    float64
 2   graphicsCard  406 non-null    object 
 3   processor     834 non-null    object 
 4   screen        836 non-null    float64
 5   ram           834 non-null    float64
 6   memmory       822 non-null    float64
dtypes: float64(4), object(3)
memory usage: 48.2+ KB


In [148]:
df['processor'].unique().size

148

In [149]:
df['graphicsCard'].unique().size

47

## Preparing data for model

In [153]:
final_data_frame = df[['title','screen','ram','memmory','price']]
final_data_frame

Unnamed: 0,title,screen,ram,memmory,price
0,Acer Aspire 5 Core i5-1035G1 8GB 512GB SSD 15....,15.6,8.0,512.0,579.97
1,Lenovo V15-IIL Core i5-1035G1 8GB 256GB SSD 15...,15.6,8.0,256.0,549.97
2,Refurbished Lenovo Yoga 11e Intel Celeron N294...,11.6,4.0,16.0,119.97
3,Asus C223NA Intel Celeron N3350 4GB 32GB eMMC ...,11.6,4.0,32.0,179.97
4,CODA 1.2 Intel Celeron N4020 4GB 64GB eMMC 12....,12.5,4.0,64.0,199.97
...,...,...,...,...,...
874,Refurbished HP Stream 11-Y0XX Intel Celeron N3...,,,,89.97
875,Refurbished HP 14-BP1XX Core i5-8250U 8GB 128G...,,,,279.97
876,Refurbished Lenovo IdeaPad 320-15IAP Intel Pen...,,,,159.97
877,Refurbished HP Notebook Intel Pentium N3710 8G...,,,,179.97


In [157]:
final_data_frame.isnull().sum()

title       0
screen     43
ram        45
memmory    57
price      16
dtype: int64

In [162]:
test_data = final_data_frame[ final_data_frame['price'].isnull()]

In [164]:
final_data_frame.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [167]:
final_data_frame.isnull().sum()

title      0
screen     0
ram        0
memmory    0
price      0
dtype: int64

In [168]:
final_data_frame.head(5)

Unnamed: 0,title,screen,ram,memmory,price
0,Acer Aspire 5 Core i5-1035G1 8GB 512GB SSD 15....,15.6,8.0,512.0,579.97
1,Lenovo V15-IIL Core i5-1035G1 8GB 256GB SSD 15...,15.6,8.0,256.0,549.97
2,Refurbished Lenovo Yoga 11e Intel Celeron N294...,11.6,4.0,16.0,119.97
3,Asus C223NA Intel Celeron N3350 4GB 32GB eMMC ...,11.6,4.0,32.0,179.97
4,CODA 1.2 Intel Celeron N4020 4GB 64GB eMMC 12....,12.5,4.0,64.0,199.97


In [169]:
final_data_frame.corr()

Unnamed: 0,screen,ram,memmory,price
screen,1.0,0.452304,0.390148,0.456002
ram,0.452304,1.0,0.684913,0.835827
memmory,0.390148,0.684913,1.0,0.586791
price,0.456002,0.835827,0.586791,1.0


In [171]:
from sklearn import linear_model

## Designing linear regression model

In [176]:
x = final_data_frame[['screen', 'ram', 'memmory']]
y = final_data_frame['price']

In [178]:
reg = linear_model.LinearRegression()
reg = reg.fit(x,y)

In [181]:
predict = reg.predict([[15.6, 16, 64], [14, 8, 32]])
predict

array([1400.71354276,  669.0484279 ])

### Predicting price of laptops with missing price value

In [184]:
x = test_data[['screen', 'ram', 'memmory']]
test_data['predicted_price'] = reg.predict(x)
test_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['predicted_price'] = reg.predict(x)


Unnamed: 0,title,screen,ram,memmory,price,predicted_price
263,Acer Travel Mate B3 Intel Celeron N4020 4GB 6...,11.6,4.0,64.0,,238.616332
275,Toshiba Dynabook Satellite Pro C40-H-105 Core ...,14.0,8.0,256.0,,674.832724
290,Acer Travel Mate Spin B3 Intel Pentium N5030 4...,11.6,4.0,128.0,,240.268988
370,Toshiba Dynabook Satellite Pro C40-G-111 Core ...,14.0,8.0,256.0,,674.832724
418,Toshiba Dynabook Satellite Pro C40-H-111 Core ...,14.0,8.0,256.0,,674.832724
419,Toshiba Dynabook Satellite Pro C40-G-10Y Core ...,14.0,8.0,256.0,,674.832724
420,Toshiba Dynabook Satellite Pro C40-G-11E Core ...,14.0,8.0,256.0,,674.832724
422,Toshiba Dynabook Satellite Pro C40-G-10Z Core ...,14.0,8.0,256.0,,674.832724
525,Toshiba Dynabook Satellite Pro L50-G-13F Core ...,15.6,8.0,256.0,,740.671754
575,Toshiba Dynabook Tecra A40-G-10G Core i7-10510...,14.0,8.0,256.0,,674.832724


In [187]:
my_laptop_info = [14, 16, 512]
predict = reg.predict([my_laptop_info])
print("My AI model predict",predict[0], "euros for my laptop while the real price is 1400 Euros.")

My AI model predict 1346.4431049470552 euros for my laptop while the real price is 1400 Euros.


## Collecting data for testing the model

In [237]:
tst_df = pd.read_csv('laptops_data.csv')

In [221]:
tst_df.head()

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,Display diagonal: 15.6 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,Display diagonal: 15.6 inch,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,Display diagonal: 17.3 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD
3,DELL G3 3779,Display diagonal: 17.3 inch,Processor: i7-8750H 2.20 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 2256 GB HDD+SSD,
4,DELL G5 5590,Display diagonal: 15.6 inch,Processor: i5-8300H 2.30 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1050 Ti,Storage: 1128 GB HDD+SSD,


In [238]:
tst_df = tst_df.dropna()
tst_df.head()

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,Display diagonal: 15.6 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,Display diagonal: 15.6 inch,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,Display diagonal: 17.3 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD
6,DELL Inspiron 5770,Display diagonal: 17.3 inch,Processor: i5-8250U 1.60 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: AMD Radeon 530,Storage: 1128 GB HDD+SSD,About 940 USD
10,DELL XPS 9560,Display diagonal: 15.6 inch,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1050,Storage: 1128 GB HDD+SSD,About 1527 USD


In [239]:
tst_df['Main specs Display diagonal']=tst_df['Main specs Display diagonal'].str.replace('Display diagonal: ','')

In [229]:
tst_df.head()

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,15.6 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,15.6 inch,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,17.3 inch,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD
6,DELL Inspiron 5770,17.3 inch,Processor: i5-8250U 1.60 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: AMD Radeon 530,Storage: 1128 GB HDD+SSD,About 940 USD
10,DELL XPS 9560,15.6 inch,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1050,Storage: 1128 GB HDD+SSD,About 1527 USD


In [240]:
tst_df['Main specs Display diagonal']=tst_df['Main specs Display diagonal'].str.replace('inch','')

In [231]:
tst_df.head(3)

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,Internal memory: 8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,Internal memory: 16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD


In [241]:
tst_df['Main specs Internal memory']=tst_df['Main specs Internal memory'].str.replace('Internal memory: ','')

In [233]:
tst_df.head(3)

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,8 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,16 GB DDR4-SDRAM,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD


In [234]:
tst_df['Main specs Internal memory'].unique()

array(['16 GB DDR4-SDRAM', '8 GB DDR4-SDRAM', '32 GB DDR4-SDRAM',
       '16 GB DDR3L-SDRAM'], dtype=object)

In [242]:
tst_df['Main specs Internal memory'] = tst_df['Main specs Internal memory'].str[:2]

In [243]:
tst_df.head(3)

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,16,Graphics: NVIDIA GeForce GTX 1060,Storage: 1256 GB HDD+SSD,About 2293 USD
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,8,Graphics: NVIDIA GeForce GTX 1060,Storage: 1000 GB HDD,About 1613 USD
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,16,Graphics: NVIDIA GeForce GTX 1070,Storage: 1256 GB HDD+SSD,About 2001 USD


In [244]:
tst_df['Main specs Storage']=tst_df['Main specs Storage'].str.replace('Storage: ','')

In [245]:
tst_df['Main specs Storage'].unique()

array(['1256 GB HDD+SSD', '1000 GB HDD', '1128 GB HDD+SSD', '512 GB SSD',
       '1000 GB SSD', '2000 GB SSD', '256 GB SSD'], dtype=object)

In [246]:
tst_df['Main specs Storage'] = tst_df['Main specs Storage'].str[:4]

In [247]:
tst_df['Main specs Storage'].unique()

array(['1256', '1000', '1128', '512 ', '2000', '256 '], dtype=object)

In [248]:
tst_df['Main specs Storage'] = tst_df['Main specs Storage'].str.strip()

In [249]:
tst_df['Main specs Internal memory'] = tst_df['Main specs Internal memory'].str.strip()

In [250]:
tst_df.head(3)

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,16,Graphics: NVIDIA GeForce GTX 1060,1256,About 2293 USD
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,8,Graphics: NVIDIA GeForce GTX 1060,1000,About 1613 USD
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,16,Graphics: NVIDIA GeForce GTX 1070,1256,About 2001 USD


In [251]:
tst_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13 entries, 0 to 82
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   title                        13 non-null     object
 1   Main specs Display diagonal  13 non-null     object
 2   Main specs Processor         13 non-null     object
 3   Main specs Internal memory   13 non-null     object
 4   Main specs Graphics          13 non-null     object
 5   Main specs Storage           13 non-null     object
 6   Price                        13 non-null     object
dtypes: object(7)
memory usage: 832.0+ bytes


In [252]:
tst_df['Main specs Display diagonal'] = tst_df['Main specs Display diagonal'].astype('float')
tst_df['Main specs Internal memory'] = tst_df['Main specs Internal memory'].astype('float')
tst_df['Main specs Storage'] = tst_df['Main specs Storage'].astype('float')

In [253]:
tst_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13 entries, 0 to 82
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   title                        13 non-null     object 
 1   Main specs Display diagonal  13 non-null     float64
 2   Main specs Processor         13 non-null     object 
 3   Main specs Internal memory   13 non-null     float64
 4   Main specs Graphics          13 non-null     object 
 5   Main specs Storage           13 non-null     float64
 6   Price                        13 non-null     object 
dtypes: float64(3), object(4)
memory usage: 832.0+ bytes


In [264]:
x = tst_df[['Main specs Display diagonal', 'Main specs Internal memory', 'Main specs Storage']]
tst_df['predicted_price(dollors)'] = reg.predict(x) * 1.2
tst_df

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price,predicted_price(dollors)
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1060,1256.0,About 2293 USD,1717.793114
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,8.0,Graphics: NVIDIA GeForce GTX 1060,1000.0,About 1613 USD,911.860657
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1070,1256.0,About 2001 USD,1801.737878
6,DELL Inspiron 5770,17.3,Processor: i5-8250U 1.60 GHz,8.0,Graphics: AMD Radeon 530,1128.0,About 940 USD,999.771795
10,DELL XPS 9560,15.6,Processor: i5-7300HQ 2.5 GHz,8.0,Graphics: NVIDIA GeForce GTX 1050,1128.0,About 1527 USD,915.827032
11,DELL XPS 9560,15.6,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1050,512.0,About 1977 USD,1694.738562
14,DELL XPS 9570,15.6,Processor: i9-8950HK 2.90 GHz,32.0,Graphics: NVIDIA GeForce GTX 1050 Ti,1000.0,About 3068 USD,3305.859781
15,DELL XPS 9570,15.6,Processor: i7-8750H 2.20 GHz,16.0,Graphics: NVIDIA GeForce GTX 1050 Ti,512.0,About 2500 USD,1694.738562
16,DELL XPS 9570,15.6,Processor: i5-8300H 2.30 GHz,8.0,Graphics: NVIDIA GeForce GTX 1050,1128.0,About 1704 USD,915.827032
17,DELL XPS 9570,15.6,Processor: i9-8950HK 2.90 GHz,32.0,Graphics: NVIDIA GeForce GTX 1050 Ti,2000.0,About 3689 USD,3336.847082


In [269]:
tst_df['Price']=tst_df['Price'].str.replace('About ','')
tst_df['Price']=tst_df['Price'].str.replace('USD','')
tst_df['Price']=tst_df['Price'].astype('float')

In [287]:
tst_df

Unnamed: 0,title,Main specs Display diagonal,Main specs Processor,Main specs Internal memory,Main specs Graphics,Main specs Storage,Price,predicted_price(dollors)
0,Alienware 15 R3,15.6,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1060,1256.0,2293.0,1717.793114
1,Alienware 15 R3,15.6,Processor: i5-7300HQ 2.5 GHz,8.0,Graphics: NVIDIA GeForce GTX 1060,1000.0,1613.0,911.860657
2,Alienware 17 R4,17.3,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1070,1256.0,2001.0,1801.737878
6,DELL Inspiron 5770,17.3,Processor: i5-8250U 1.60 GHz,8.0,Graphics: AMD Radeon 530,1128.0,940.0,999.771795
10,DELL XPS 9560,15.6,Processor: i5-7300HQ 2.5 GHz,8.0,Graphics: NVIDIA GeForce GTX 1050,1128.0,1527.0,915.827032
11,DELL XPS 9560,15.6,Processor: i7-7700HQ 2.8 GHz,16.0,Graphics: NVIDIA GeForce GTX 1050,512.0,1977.0,1694.738562
14,DELL XPS 9570,15.6,Processor: i9-8950HK 2.90 GHz,32.0,Graphics: NVIDIA GeForce GTX 1050 Ti,1000.0,3068.0,3305.859781
15,DELL XPS 9570,15.6,Processor: i7-8750H 2.20 GHz,16.0,Graphics: NVIDIA GeForce GTX 1050 Ti,512.0,2500.0,1694.738562
16,DELL XPS 9570,15.6,Processor: i5-8300H 2.30 GHz,8.0,Graphics: NVIDIA GeForce GTX 1050,1128.0,1704.0,915.827032
17,DELL XPS 9570,15.6,Processor: i9-8950HK 2.90 GHz,32.0,Graphics: NVIDIA GeForce GTX 1050 Ti,2000.0,3689.0,3336.847082


In [296]:
from sklearn.metrics import mean_squared_error
test_y = tst_df['Price']
linear_predicted_rating = tst_df['predicted_price(dollors)']
errors = mean_squared_error(test_y, linear_predicted_rating, squared=False)

In [297]:
print( "Mean Square Error is equal to ",errors)

Mean Square Error is equal to  639.5492907848534


In [312]:
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
# real value
expected = tst_df['Price']
# predicted value
predicted = tst_df['predicted_price(dollors)']

In [329]:
errors_list = []
errors_list = abs(expected - predicted) / expected
totall_error = errors_list.sum() / errors_list.count()
print('The final error on new dataset is nearly', totall_error,'percent.')

The final error on new dataset is nearly 0.28346740968868145 percent.
