In [2]:
import pandas as pd 
import numpy as np

In [3]:

try:
    df = pd.read_csv('laptop_price.csv', encoding='utf-8')
except UnicodeDecodeError:
    try:
        df = pd.read_csv('laptop_price.csv', encoding='ISO-8859-1')
    except UnicodeDecodeError:
        df = pd.read_csv('laptop_price.csv', encoding='latin1')

# Now, df contains your CSV data


In [4]:
df.head(5)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   object 
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   object 
 11  Price_euros       1303 non-null   float64
dtypes: float64(2), int64(1), object(9)
memory usage: 122.3+ KB


In [6]:
df.isnull().sum()

laptop_ID           0
Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Gpu                 0
OpSys               0
Weight              0
Price_euros         0
dtype: int64

In [7]:
df['Ram']

0        8GB
1        8GB
2        8GB
3       16GB
4        8GB
        ... 
1298     4GB
1299    16GB
1300     2GB
1301     6GB
1302     4GB
Name: Ram, Length: 1303, dtype: object

In [8]:
# Assuming df is your DataFrame and 'Ram' is the column containing RAM sizes
df['Ram'] = df['Ram'].str.replace('GB', '').astype(int)


In [9]:
df['Weight'] = df['Weight'].str.replace('kg','').astype(float)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   laptop_ID         1303 non-null   int64  
 1   Company           1303 non-null   object 
 2   Product           1303 non-null   object 
 3   TypeName          1303 non-null   object 
 4   Inches            1303 non-null   float64
 5   ScreenResolution  1303 non-null   object 
 6   Cpu               1303 non-null   object 
 7   Ram               1303 non-null   int32  
 8   Gpu               1303 non-null   object 
 9   OpSys             1303 non-null   object 
 10  Weight            1303 non-null   float64
 11  Price_euros       1303 non-null   float64
dtypes: float64(3), int32(1), int64(1), object(7)
memory usage: 117.2+ KB


In [11]:
df.head(5)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6


In [12]:
# Select numeric columns
numeric_columns = df.select_dtypes(include=['float64', 'int64','int32']).columns

# Calculate correlation for numeric columns only
correlation = df[numeric_columns].corr()['Price_euros']


In [13]:
correlation

laptop_ID      0.067830
Inches         0.068197
Ram            0.743007
Weight         0.210370
Price_euros    1.000000
Name: Price_euros, dtype: float64

In [14]:
import pandas as pd

def clean_company_names(df):
    # Define the list of companies to be replaced with 'others'
    other_companies = ['Samsung', 'Razer', 'Mediacom', 'Microsoft', 'Xiaomi', 'Vero', 'Chuwi', 'Google', 'Fujitsu', 'LG', 'Huawei']
    
    # Replace the companies in the DataFrame with 'others'
    df['Company'] = df['Company'].apply(lambda x: 'others' if x in other_companies else x)
    
    return df


df_cleaned = clean_company_names(df)

# Check the value counts of the 'Company' column after cleaning
print(df_cleaned['Company'].value_counts())


Company
Dell       297
Lenovo     297
HP         274
Asus       158
Acer       103
MSI         54
others      51
Toshiba     48
Apple       21
Name: count, dtype: int64


In [15]:
df['Company'].value_counts()

Company
Dell       297
Lenovo     297
HP         274
Asus       158
Acer       103
MSI         54
others      51
Toshiba     48
Apple       21
Name: count, dtype: int64

In [16]:
def clean_company_names(df):
    # Define the list of companies to be replaced with 'others'
    other_companies = ['Samsung', 'Razer', 'Mediacom', 'Microsoft', 'Xiaomi', 'Vero', 'Chuwi', 'Google', 'Fujitsu', 'LG', 'Huawei']
    
    # Replace the companies in the DataFrame with 'others'
    df['Company'] = df['Company'].apply(lambda x: 'others' if x in other_companies else x)
    
    return df


df_cleaned = clean_company_names(df)

# Check the value counts of the 'Company' column after cleaning
print(df_cleaned['Company'].value_counts())


Company
Dell       297
Lenovo     297
HP         274
Asus       158
Acer       103
MSI         54
others      51
Toshiba     48
Apple       21
Name: count, dtype: int64


In [17]:
df = df_cleaned

In [18]:
df['Company'].sample(10)

1046         HP
597      Lenovo
249       Apple
348        Asus
943     Toshiba
302      Lenovo
170      others
781        Asus
490      Lenovo
1218     Lenovo
Name: Company, dtype: object

In [19]:
df.sample(9)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
335,341,HP,EliteBook 840,Notebook,14.0,Full HD 1920x1080,Intel Core i5 7300U 2.6GHz,8,Intel HD Graphics 620,Windows 10,1.48,1268.0
1021,1035,Toshiba,Portégé Z30-C-16K,Ultrabook,13.3,Full HD 1920x1080,Intel Core i5 6200U 2.3GHz,8,Intel HD Graphics 520,Windows 10,1.2,1590.0
887,898,Asus,Rog GL753VD-GC082T,Gaming,17.3,Full HD 1920x1080,Intel Core i5 7300HQ 2.5GHz,12,Nvidia GeForce GTX 1050,Windows 10,2.2,1369.9
1091,1106,HP,250 G6,Notebook,15.6,1366x768,Intel Core i3 6006U 2GHz,4,Intel HD Graphics 520,No OS,1.86,345.99
1252,1270,Lenovo,IdeaPad 310-15ISK,Notebook,15.6,Full HD 1920x1080,Intel Core i3 6100U 2.3GHz,6,Nvidia GeForce 920MX,Windows 10,2.4,569.0
675,683,MSI,GP62 7RDX,Gaming,15.6,Full HD 1920x1080,Intel Core i7 7700HQ 2.8GHz,16,Nvidia GeForce GTX 1050,Windows 10,2.4,1294.0
1201,1219,Lenovo,IdeaPad 310-15ISK,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,8,Nvidia GeForce 920MX,No OS,2.2,629.0
450,457,HP,15-bs024nv (i5-7200U/8GB/128GB/W10),Notebook,15.6,1366x768,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,Windows 10,1.91,589.0
776,785,Lenovo,Thinkpad P50,Notebook,15.6,4K Ultra HD 3840x2160,Intel Core i7 6820HQ 2.7GHz,16,Nvidia Quadro M2000M,Windows 7,2.5,2370.0


In [20]:
df =df.drop(columns='Product',axis=1)

In [21]:
df.head(5)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
0,1,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69
1,2,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94
2,3,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0
3,4,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45
4,5,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6


In [22]:
df['TypeName'].value_counts()

TypeName
Notebook              727
Gaming                205
Ultrabook             196
2 in 1 Convertible    121
Workstation            29
Netbook                25
Name: count, dtype: int64

In [23]:
df.sample(9)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros
920,934,Dell,Notebook,15.6,1366x768,Intel Core i5 7200U 2.5GHz,4,AMD Radeon R5 M420,Windows 10,2.18,684.99
470,477,HP,2 in 1 Convertible,13.3,IPS Panel 4K Ultra HD / Touchscreen 3840x2160,Intel Core i7 8550U 1.8GHz,8,Intel UHD Graphics 620,Windows 10,1.29,1999.0
139,142,HP,Notebook,13.3,IPS Panel Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,4,Intel HD Graphics 620,Windows 10,1.32,699.0
546,553,Lenovo,Notebook,14.0,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,4,Intel HD Graphics 620,Windows 10,1.87,785.0
1092,1107,Asus,Gaming,17.3,IPS Panel Full HD 1920x1080,Intel Core i7 7700HQ 2.8GHz,8,Nvidia GeForce GTX 1060,Windows 10,2.7,1599.0
1079,1094,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 6200U 2.3GHz,4,Intel HD Graphics 520,Windows 7,2.31,940.0
335,341,HP,Notebook,14.0,Full HD 1920x1080,Intel Core i5 7300U 2.6GHz,8,Intel HD Graphics 620,Windows 10,1.48,1268.0
43,44,Acer,Notebook,15.6,IPS Panel Full HD 1920x1080,Intel Core i5 8250U 1.6GHz,4,Intel UHD Graphics 620,Windows 10,2.2,682.0
148,151,MSI,Gaming,17.3,Full HD 1920x1080,Intel Core i7 7700HQ 2.8GHz,16,Nvidia GeForce GTX 1070,Windows 10,2.9,2029.0


In [24]:
df['Touchscreen'] = df['ScreenResolution'].apply(lambda x: 1 if "Touchscreen" in x else 0)
df['IPS'] = df['ScreenResolution'].apply(lambda x: 1 if "IPS" in x else 0)
df['Full HD'] = df['ScreenResolution'].apply(lambda x: 1 if "Full HD" in x else 0)


In [25]:
df['ScreenResolution'].value_counts()

ScreenResolution
Full HD 1920x1080                                507
1366x768                                         281
IPS Panel Full HD 1920x1080                      230
IPS Panel Full HD / Touchscreen 1920x1080         53
Full HD / Touchscreen 1920x1080                   47
1600x900                                          23
Touchscreen 1366x768                              16
Quad HD+ / Touchscreen 3200x1800                  15
IPS Panel 4K Ultra HD 3840x2160                   12
IPS Panel 4K Ultra HD / Touchscreen 3840x2160     11
4K Ultra HD / Touchscreen 3840x2160               10
4K Ultra HD 3840x2160                              7
Touchscreen 2560x1440                              7
IPS Panel 1366x768                                 7
IPS Panel Quad HD+ / Touchscreen 3200x1800         6
IPS Panel Retina Display 2560x1600                 6
IPS Panel Retina Display 2304x1440                 6
Touchscreen 2256x1504                              6
IPS Panel Touchscreen 2560x14

In [26]:
df.head()

Unnamed: 0,laptop_ID,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD
0,1,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0
1,2,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0
2,3,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1
3,4,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0
4,5,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0


In [27]:
df=df.drop('ScreenResolution',axis=1)

In [28]:
df.head(6)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD
0,1,Apple,Ultrabook,13.3,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0
1,2,Apple,Ultrabook,13.3,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0
2,3,HP,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1
3,4,Apple,Ultrabook,15.4,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0
4,5,Apple,Ultrabook,13.3,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0
5,6,Acer,Notebook,15.6,AMD A9-Series 9420 3GHz,4,AMD Radeon R5,Windows 10,2.1,400.0,0,0,0


In [29]:
df['Cpu'].value_counts()

Cpu
Intel Core i5 7200U 2.5GHz       190
Intel Core i7 7700HQ 2.8GHz      146
Intel Core i7 7500U 2.7GHz       134
Intel Core i7 8550U 1.8GHz        73
Intel Core i5 8250U 1.6GHz        72
                                ... 
Intel Core M M3-6Y30 0.9GHz        1
AMD A9-Series 9420 2.9GHz          1
Intel Core i3 6006U 2.2GHz         1
AMD A6-Series 7310 2GHz            1
Intel Xeon E3-1535M v6 3.1GHz      1
Name: count, Length: 118, dtype: int64

In [30]:
df['Cpu_name'] = df['Cpu'].apply(lambda x: " ".join(x.split()[0:3]) if isinstance(x, str) else x)

In [31]:
df.head(8)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name
0,1,Apple,Ultrabook,13.3,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0,Intel Core i5
1,2,Apple,Ultrabook,13.3,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0,Intel Core i5
2,3,HP,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1,Intel Core i5
3,4,Apple,Ultrabook,15.4,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0,Intel Core i7
4,5,Apple,Ultrabook,13.3,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0,Intel Core i5
5,6,Acer,Notebook,15.6,AMD A9-Series 9420 3GHz,4,AMD Radeon R5,Windows 10,2.1,400.0,0,0,0,AMD A9-Series 9420
6,7,Apple,Ultrabook,15.4,Intel Core i7 2.2GHz,16,Intel Iris Pro Graphics,Mac OS X,2.04,2139.97,0,1,0,Intel Core i7
7,8,Apple,Ultrabook,13.3,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,1158.7,0,0,0,Intel Core i5


In [32]:
df['Cpu_name'].value_counts()

Cpu_name
Intel Core i7               527
Intel Core i5               423
Intel Core i3               136
Intel Celeron Dual           80
Intel Pentium Quad           27
Intel Core M                 19
AMD A9-Series 9420           12
Intel Celeron Quad            8
AMD A6-Series 9220            8
AMD A12-Series 9720P          7
Intel Atom x5-Z8350           5
AMD A8-Series 7410            4
Intel Atom x5-Z8550           4
Intel Pentium Dual            3
AMD A9-Series 9410            3
AMD Ryzen 1700                3
AMD A9-Series A9-9420         2
AMD A10-Series 9620P          2
Intel Atom X5-Z8350           2
AMD E-Series E2-9000e         2
Intel Xeon E3-1535M           2
Intel Xeon E3-1505M           2
AMD E-Series 7110             2
AMD A10-Series 9600P          2
AMD A6-Series A6-9220         2
AMD A10-Series A10-9620P      2
AMD Ryzen 1600                1
Intel Atom x5-Z8300           1
AMD E-Series E2-6110          1
AMD FX 9830P                  1
AMD E-Series E2-9000          1

In [33]:
def cpu_chise(inputs):
    if inputs == "Intel Core i7" or inputs == 'Intel Core i5' or inputs == "Intel Core i3":
        return inputs
    else:
        if inputs.split()[0] == 'AMD':
            return "AMD"
        else:
            return "Other"

df['Cpu_name'] = df['Cpu_name'].apply(cpu_chise)

In [34]:
df['Cpu_name'].value_counts()

Cpu_name
Intel Core i7    527
Intel Core i5    423
Other            155
Intel Core i3    136
AMD               62
Name: count, dtype: int64

In [35]:
df.head(5)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Cpu,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name
0,1,Apple,Ultrabook,13.3,Intel Core i5 2.3GHz,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0,Intel Core i5
1,2,Apple,Ultrabook,13.3,Intel Core i5 1.8GHz,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0,Intel Core i5
2,3,HP,Notebook,15.6,Intel Core i5 7200U 2.5GHz,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1,Intel Core i5
3,4,Apple,Ultrabook,15.4,Intel Core i7 2.7GHz,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0,Intel Core i7
4,5,Apple,Ultrabook,13.3,Intel Core i5 3.1GHz,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0,Intel Core i5


In [36]:
df = df.drop('Cpu',axis=1)

In [37]:
df.head(6)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name
0,1,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0,Intel Core i5
1,2,Apple,Ultrabook,13.3,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0,Intel Core i5
2,3,HP,Notebook,15.6,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1,Intel Core i5
3,4,Apple,Ultrabook,15.4,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0,Intel Core i7
4,5,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0,Intel Core i5
5,6,Acer,Notebook,15.6,4,AMD Radeon R5,Windows 10,2.1,400.0,0,0,0,AMD


In [38]:
df['OpSys'].value_counts()

OpSys
Windows 10      1072
No OS             66
Linux             62
Windows 7         45
Chrome OS         27
macOS             13
Mac OS X           8
Windows 10 S       8
Android            2
Name: count, dtype: int64

In [39]:
def Opsys_chocie(opsy):
   if opsy == 'Windows 10' or opsy == 'Windows 7' or opsy == 'Windows 10 S' :
       return "Windows"
   else:
       if opsy == 'macOS' or opsy == 'Mac OS X':
           return "Mac"
       else:
           return "others"

df["opsyss"] = df['OpSys'].apply(Opsys_chocie)

In [40]:
df.head(5)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Ram,Gpu,OpSys,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name,opsyss
0,1,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,0,1,0,Intel Core i5,Mac
1,2,Apple,Ultrabook,13.3,8,Intel HD Graphics 6000,macOS,1.34,898.94,0,0,0,Intel Core i5,Mac
2,3,HP,Notebook,15.6,8,Intel HD Graphics 620,No OS,1.86,575.0,0,0,1,Intel Core i5,others
3,4,Apple,Ultrabook,15.4,16,AMD Radeon Pro 455,macOS,1.83,2537.45,0,1,0,Intel Core i7,Mac
4,5,Apple,Ultrabook,13.3,8,Intel Iris Plus Graphics 650,macOS,1.37,1803.6,0,1,0,Intel Core i5,Mac


In [41]:
df=df.drop('OpSys',axis=1)

In [42]:
df['Gpu'].value_counts()

Gpu
Intel HD Graphics 620      281
Intel HD Graphics 520      185
Intel UHD Graphics 620      68
Nvidia GeForce GTX 1050     66
Nvidia GeForce GTX 1060     48
                          ... 
AMD Radeon R5 520            1
AMD Radeon R7                1
Intel HD Graphics 540        1
AMD Radeon 540               1
ARM Mali T860 MP4            1
Name: count, Length: 110, dtype: int64

In [43]:
def gpu_counts (gpu_names):
    if gpu_names.split()[0] == 'Intel':
        return "Intel_Graphics"
    else:
        if gpu_names.split()[0] == 'Nvidia':
            return 'Nvidia Graphics'
        else:
            if gpu_names.split()[0] == 'AMD':
                return "AMD Graphics"
            else:
                if gpu_names.split()[0] == 'ARM':
                    return "ARM Graphics"

df['Gpus_counts'] = df['Gpu'].apply(gpu_counts)

In [44]:
df.sample(5)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Ram,Gpu,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name,opsyss,Gpus_counts
1072,1087,HP,Ultrabook,12.5,8,Intel HD Graphics 520,1.26,1199.0,0,0,1,Intel Core i5,Windows,Intel_Graphics
531,538,Toshiba,Notebook,15.6,8,Intel HD Graphics 620,2.0,860.0,0,1,1,Intel Core i5,Windows,Intel_Graphics
828,837,Acer,Netbook,11.6,4,Intel HD Graphics 400,1.35,355.0,0,1,0,Other,others,Intel_Graphics
621,628,Lenovo,Notebook,14.0,8,Intel HD Graphics 520,1.65,1465.0,0,0,1,Intel Core i5,Windows,Intel_Graphics
299,304,Asus,Ultrabook,15.6,16,Nvidia GeForce 940MX,1.63,1468.0,0,0,1,Intel Core i7,Windows,Nvidia Graphics


In [45]:
df = df.drop('Gpu',axis=1)

In [46]:
df.head(5)

Unnamed: 0,laptop_ID,Company,TypeName,Inches,Ram,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name,opsyss,Gpus_counts
0,1,Apple,Ultrabook,13.3,8,1.37,1339.69,0,1,0,Intel Core i5,Mac,Intel_Graphics
1,2,Apple,Ultrabook,13.3,8,1.34,898.94,0,0,0,Intel Core i5,Mac,Intel_Graphics
2,3,HP,Notebook,15.6,8,1.86,575.0,0,0,1,Intel Core i5,others,Intel_Graphics
3,4,Apple,Ultrabook,15.4,16,1.83,2537.45,0,1,0,Intel Core i7,Mac,AMD Graphics
4,5,Apple,Ultrabook,13.3,8,1.37,1803.6,0,1,0,Intel Core i5,Mac,Intel_Graphics


In [47]:
df = df.drop('laptop_ID',axis=1)

In [48]:
df.head(5)

Unnamed: 0,Company,TypeName,Inches,Ram,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name,opsyss,Gpus_counts
0,Apple,Ultrabook,13.3,8,1.37,1339.69,0,1,0,Intel Core i5,Mac,Intel_Graphics
1,Apple,Ultrabook,13.3,8,1.34,898.94,0,0,0,Intel Core i5,Mac,Intel_Graphics
2,HP,Notebook,15.6,8,1.86,575.0,0,0,1,Intel Core i5,others,Intel_Graphics
3,Apple,Ultrabook,15.4,16,1.83,2537.45,0,1,0,Intel Core i7,Mac,AMD Graphics
4,Apple,Ultrabook,13.3,8,1.37,1803.6,0,1,0,Intel Core i5,Mac,Intel_Graphics


In [49]:
df['Company'].value_counts()

Company
Dell       297
Lenovo     297
HP         274
Asus       158
Acer       103
MSI         54
others      51
Toshiba     48
Apple       21
Name: count, dtype: int64

In [50]:
# Select numeric columns
numeric_columns = df.select_dtypes(include=['float64', 'int64','int32']).columns

# Calculate correlation for numeric columns only
correlation = df[numeric_columns].corr()['Price_euros']


In [51]:
correlation

Inches         0.068197
Ram            0.743007
Weight         0.210370
Price_euros    1.000000
Touchscreen    0.191226
IPS            0.252208
Full HD        0.198612
Name: Price_euros, dtype: float64

In [52]:
df = df.drop('Inches',axis=1)

In [53]:
df.head(5)

Unnamed: 0,Company,TypeName,Ram,Weight,Price_euros,Touchscreen,IPS,Full HD,Cpu_name,opsyss,Gpus_counts
0,Apple,Ultrabook,8,1.37,1339.69,0,1,0,Intel Core i5,Mac,Intel_Graphics
1,Apple,Ultrabook,8,1.34,898.94,0,0,0,Intel Core i5,Mac,Intel_Graphics
2,HP,Notebook,8,1.86,575.0,0,0,1,Intel Core i5,others,Intel_Graphics
3,Apple,Ultrabook,16,1.83,2537.45,0,1,0,Intel Core i7,Mac,AMD Graphics
4,Apple,Ultrabook,8,1.37,1803.6,0,1,0,Intel Core i5,Mac,Intel_Graphics


In [54]:
df['TypeName'].value_counts()

TypeName
Notebook              727
Gaming                205
Ultrabook             196
2 in 1 Convertible    121
Workstation            29
Netbook                25
Name: count, dtype: int64

In [55]:
data = pd.get_dummies(df)

In [56]:
data.head(1)

Unnamed: 0,Ram,Weight,Price_euros,Touchscreen,IPS,Full HD,Company_Acer,Company_Apple,Company_Asus,Company_Dell,...,Cpu_name_Intel Core i5,Cpu_name_Intel Core i7,Cpu_name_Other,opsyss_Mac,opsyss_Windows,opsyss_others,Gpus_counts_AMD Graphics,Gpus_counts_ARM Graphics,Gpus_counts_Intel_Graphics,Gpus_counts_Nvidia Graphics
0,8,1.37,1339.69,0,1,0,False,True,False,False,...,True,False,False,True,False,False,False,False,True,False


In [57]:
df = data.astype(int)

In [58]:
df.head(8)

Unnamed: 0,Ram,Weight,Price_euros,Touchscreen,IPS,Full HD,Company_Acer,Company_Apple,Company_Asus,Company_Dell,...,Cpu_name_Intel Core i5,Cpu_name_Intel Core i7,Cpu_name_Other,opsyss_Mac,opsyss_Windows,opsyss_others,Gpus_counts_AMD Graphics,Gpus_counts_ARM Graphics,Gpus_counts_Intel_Graphics,Gpus_counts_Nvidia Graphics
0,8,1,1339,0,1,0,0,1,0,0,...,1,0,0,1,0,0,0,0,1,0
1,8,1,898,0,0,0,0,1,0,0,...,1,0,0,1,0,0,0,0,1,0
2,8,1,575,0,0,1,0,0,0,0,...,1,0,0,0,0,1,0,0,1,0
3,16,1,2537,0,1,0,0,1,0,0,...,0,1,0,1,0,0,1,0,0,0
4,8,1,1803,0,1,0,0,1,0,0,...,1,0,0,1,0,0,0,0,1,0
5,4,2,400,0,0,0,1,0,0,0,...,0,0,0,0,1,0,1,0,0,0
6,16,2,2139,0,1,0,0,1,0,0,...,0,1,0,1,0,0,0,0,1,0
7,8,1,1158,0,0,0,0,1,0,0,...,1,0,0,1,0,0,0,0,1,0


In [59]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [60]:
x = df.drop('Price_euros',axis=1)
y = df['Price_euros']

In [61]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)


In [62]:
from sklearn.metrics import mean_absolute_error

In [63]:
def model_trains(model):
    model.fit(X_train, y_train)
    acc = model.score(X_test,y_test)
    print(str(model)+"--->" + str(acc))

In [64]:
lr = LinearRegression()
model_trains(lr)

LinearRegression()--->0.6382271182252421


In [65]:
k = [1, 2, 6, 10, 11]
accuracy_values = []  # Corrected the variable name

for i in k:
    KNe = KNeighborsRegressor(n_neighbors=i)  # Corrected the parameter name
    KNe.fit(X_train, y_train)
    acc = KNe.score(X_test, y_test)
    accuracy_values.append(acc)  # Appending accuracy to the list

print(accuracy_values) 

[0.6451256417922904, 0.6464793281962757, 0.7296909216128777, 0.7286846018328641, 0.7177853933181905]


In [66]:
e = [12, 23, 43, 17, 20,40,21,6]
accuracy_values = []  # Corrected the variable name

for i in e:
    rf = RandomForestRegressor(n_estimators=i)  # Corrected the parameter name
    rf.fit(X_train, y_train)
    acc = KNe.score(X_test, y_test)
    accuracy_values.append(acc)  # Appending accuracy to the list

print(accuracy_values) 

[0.7177853933181905, 0.7177853933181905, 0.7177853933181905, 0.7177853933181905, 0.7177853933181905, 0.7177853933181905, 0.7177853933181905, 0.7177853933181905]


In [67]:
lr = SVC()
model_trains(lr)

SVC()--->0.011494252873563218


In [68]:
k_values = [1, 2, 6, 10, 11]
p_values = [1, 2]  # 1: Manhattan distance, 2: Euclidean distance
accuracy_values = []

for k in k_values:
    for p in p_values:
        KNe = KNeighborsRegressor(n_neighbors=k, p=p)
        KNe.fit(X_train, y_train)
        acc = KNe.score(X_test, y_test)
        accuracy_values.append((k, p, acc))

print(accuracy_values)


[(1, 1, 0.6498877648960057), (1, 2, 0.6451256417922904), (2, 1, 0.645235257291542), (2, 2, 0.6464793281962757), (6, 1, 0.6978992008835239), (6, 2, 0.7296909216128777), (10, 1, 0.7141486781197897), (10, 2, 0.7286846018328641), (11, 1, 0.7053662641393159), (11, 2, 0.7177853933181905)]


In [69]:
lap_top_model = KNeighborsRegressor(n_neighbors=2, p=2)
lap_top_model.fit(X_train, y_train)

In [70]:
acc = KNe.score(X_test, y_test)
acc

0.7177853933181905

In [71]:
import joblib

# Assuming best_model is already defined as the model with the best parameter

# Save the model to a file
joblib.dump(lap_top_model, 'knn_regressor_model.pkl')


['knn_regressor_model.pkl']

In [72]:
np.array(X_test)[:1]

array([[8, 2, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 1, 0, 0, 0, 1]])

In [73]:
x_new = ([[6, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
        1, 0, 0, 0, 1, 0, 1, 0, 0, 0]])


In [74]:
x_new = np.array(x_new)

In [75]:
x_new.shape

(1, 32)

In [76]:
lap_top_model.predict(x_new)



array([659.])

In [77]:
np.array(y_test)[:1]

array([1149])

In [78]:
X_train.columns

Index(['Ram', 'Weight', 'Touchscreen', 'IPS', 'Full HD', 'Company_Acer',
       'Company_Apple', 'Company_Asus', 'Company_Dell', 'Company_HP',
       'Company_Lenovo', 'Company_MSI', 'Company_Toshiba', 'Company_others',
       'TypeName_2 in 1 Convertible', 'TypeName_Gaming', 'TypeName_Netbook',
       'TypeName_Notebook', 'TypeName_Ultrabook', 'TypeName_Workstation',
       'Cpu_name_AMD', 'Cpu_name_Intel Core i3', 'Cpu_name_Intel Core i5',
       'Cpu_name_Intel Core i7', 'Cpu_name_Other', 'opsyss_Mac',
       'opsyss_Windows', 'opsyss_others', 'Gpus_counts_AMD Graphics',
       'Gpus_counts_ARM Graphics', 'Gpus_counts_Intel_Graphics',
       'Gpus_counts_Nvidia Graphics'],
      dtype='object')