# IMPORT LIBRARIES

In [146]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics


# OBTAIN & CLEAN DATA

In [147]:
df = pd.read_csv('Laptops.csv', index_col=0)
df.head(2)

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"


In [148]:
#convert object to float 

df['Price'] = df['Price'].str.replace('₹', '')
df['Price'] = df['Price'].str.replace(',', '').astype(float)
# convert Price in Indian Rupees to USD 
df.Price = df.Price * 0.012

## rename df

In [149]:
df = df.rename(columns={'Processor':'processor', 'Storage':'storage(GB)', 'Brand':'brand', 'Operating System':'operating_system', 'RAM':'ram(GB)', 'Touch_Screen':'touch_screen'})
df.columns

Index(['brand', 'Model Name', 'processor', 'operating_system', 'storage(GB)',
       'ram(GB)', 'Screen Size', 'touch_screen', 'Price'],
      dtype='object')

In [150]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   brand             837 non-null    object 
 1   Model Name        837 non-null    object 
 2   processor         837 non-null    object 
 3   operating_system  837 non-null    object 
 4   storage(GB)       825 non-null    object 
 5   ram(GB)           837 non-null    object 
 6   Screen Size       837 non-null    object 
 7   touch_screen      837 non-null    object 
 8   Price             837 non-null    float64
dtypes: float64(1), object(8)
memory usage: 65.4+ KB


## Remove Columns that's not important to the ML

In [151]:
df.drop(columns=['Model Name'], inplace=True)

## CLASS Processor Data

In [152]:
class DataFrameProcessor:
    def __init__(self, df):
        self.df = df
    
    
    
    def GBTB_to_num(self,column1,column2):
        df[column1 ] = df[column1].str.replace(' GB', '')
        df[column1] = df[column1].str.replace(' TB', '').astype(float)
        df[column2] = df[column2].str.replace(' GB', '').astype(float)
        # NOW 1 TB = * 1024
        df.loc[df[column1] <= 6, column1] *= 1024
        
    

    def create_dummies(self, columns):
        for column in columns:
            column_lower = column.lower()
            dummies = pd.get_dummies(self.df[column], prefix=column_lower).astype(int)
            self.df = pd.concat([self.df, dummies], axis=1)
            self.df = self.df.drop(column, axis=1)
        return self.df

    def screen_size(self, column):
        self.df['screen_size(cm)'] = self.df[column].str.split(' ', expand=True)[0].astype(float)
        self.df['<30cm'] = (self.df['screen_size(cm)'] < 30).astype(int)
        self.df['<35cm'] = ((self.df['screen_size(cm)'] >= 30) & (self.df['screen_size(cm)'] < 35)).astype(int)
        self.df['<40cm'] = ((self.df['screen_size(cm)'] >= 35) & (self.df['screen_size(cm)'] < 40)).astype(int)
        self.df['>40cm'] = (self.df['screen_size(cm)'] >= 40).astype(int)
        self.df.drop(columns=[column,'screen_size(cm)'], inplace=True)
        return self.df

In [153]:
processor = DataFrameProcessor(df)
df = processor.GBTB_to_num('storage(GB)','ram(GB)')
df = processor.screen_size('Screen Size')
df = processor.create_dummies(['processor', 'brand', 'operating_system', 'touch_screen'])


### eliminate NaNs

In [154]:
df = df.dropna()

In [155]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 825 entries, 0 to 836
Data columns (total 71 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   storage(GB)                       825 non-null    float64
 1   ram(GB)                           825 non-null    float64
 2   Price                             825 non-null    float64
 3   <30cm                             825 non-null    int64  
 4   <35cm                             825 non-null    int64  
 5   <40cm                             825 non-null    int64  
 6   >40cm                             825 non-null    int64  
 7   processor_Athlon Dual Core        825 non-null    int64  
 8   processor_Celeron Dual Core       825 non-null    int64  
 9   processor_Celeron Quad Core       825 non-null    int64  
 10  processor_Core i3                 825 non-null    int64  
 11  processor_Core i5                 825 non-null    int64  
 12  processor_Cor

# Create Model ML

In [165]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = metrics.mean_squared_error(y_test, y_pred)

print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 56893.63141884551


In [166]:
data = {'pred': y_pred, 'real': y_test}
df_pred = pd.DataFrame(data)

In [167]:
df_pred

Unnamed: 0,pred,real
621,888.247062,1139.88
180,434.488973,539.88
71,719.181641,839.88
174,351.333746,299.88
283,478.215025,382.68
...,...,...
782,781.856721,803.88
764,562.124243,563.88
150,570.294206,407.88
589,562.124243,647.88


# Create DataFrame New

In [169]:
class Laptop:
    def __init__(self):
        self.data = {
            'brand': None,
            'processor': None,
            'operating_system': None,
            'storage': None,
            'ram': None,
            'screen_size': None,
            'touch_screen': None,
        }

    def crear_df(self, request_data):
        nuevo_df = {
        "brand": request_data["brand"],
        "processor": request_data["processor"],
        "operating_system": request_data["operating_system"],
        "storage(GB)": request_data["storage(GB)"],
        "ram(GB)": request_data["ram(GB)"],
        "screen_size": request_data["screen_size"],
        "touch_screen": request_data["touch_screen"]
        }
        nuevo_df = {k: [v] for k, v in nuevo_df.items()} 

        return pd.DataFrame(nuevo_df)


Unnamed: 0,storage(GB),ram(GB),Price,<30cm,<35cm,<40cm,>40cm,processor_Athlon Dual Core,processor_Celeron Dual Core,processor_Celeron Quad Core,...,operating_system_Ubuntu,operating_system_Windows 10,operating_system_Windows 10 Home,operating_system_Windows 10 Pro,operating_system_Windows 11 Home,operating_system_Windows 11 Pro,operating_system_macOS Sonoma,operating_system_macOS Ventura,touch_screen_No,touch_screen_Yes
0,512.0,8.0,467.88,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
1,512.0,8.0,455.88,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
2,256.0,8.0,851.88,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,256.0,8.0,851.88,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,256.0,8.0,851.88,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,512.0,8.0,515.88,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
833,1024.0,8.0,725.88,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,1
834,4096.0,16.0,1067.88,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,1,0
835,512.0,8.0,455.88,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0


# CREATE BASE CSV

In [171]:
df.to_csv('base.csv', index=False) 