# IMPORT LIBRARIES

In [139]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics


# OBTAIN & CLEAN DATA

In [167]:
df = pd.read_csv('Laptops.csv', index_col=0)
df.head(2)

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"


In [168]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Brand             837 non-null    object
 1   Model Name        837 non-null    object
 2   Processor         837 non-null    object
 3   Operating System  837 non-null    object
 4   Storage           825 non-null    object
 5   RAM               837 non-null    object
 6   Screen Size       837 non-null    object
 7   Touch_Screen      837 non-null    object
 8   Price             837 non-null    object
dtypes: object(9)
memory usage: 65.4+ KB


## Remove Columns that's not important to the ML

In [169]:
df.drop(columns=['Model Name'], inplace=True)

## CLASS Processor Data

In [170]:
class DataFrameProcessor:
    def __init__(self, df):
        self.df = df

    def create_dummies(self, columns):
        for column in columns:
            dummies = pd.get_dummies(self.df[column], prefix=column).astype(int)
            self.df = pd.concat([self.df, dummies], axis=1)
            self.df = self.df.drop(column, axis=1)
        return self.df

    def screen_size(self, column):
        self.df['screen_size(cm)'] = self.df[column].str.split(' ', expand=True)[0].astype(float)
        self.df['<30cm'] = (self.df['screen_size(cm)'] < 30).astype(int)
        self.df['<35cm'] = ((self.df['screen_size(cm)'] >= 30) & (self.df['screen_size(cm)'] < 35)).astype(int)
        self.df['<40cm'] = ((self.df['screen_size(cm)'] >= 35) & (self.df['screen_size(cm)'] < 40)).astype(int)
        self.df['>40cm'] = (self.df['screen_size(cm)'] >= 40).astype(int)
        self.df.drop(columns=[column,'screen_size(cm)'], inplace=True)
        return self.df

In [172]:
processor = DataFrameProcessor(df)
df = processor.create_dummies(['Processor', 'Storage', 'Brand', 'Operating System', 'RAM', 'Touch_Screen'])
df = processor.screen_size('Screen Size')

In [173]:
df

Unnamed: 0,Price,Processor_Athlon Dual Core,Processor_Celeron Dual Core,Processor_Celeron Quad Core,Processor_Core i3,Processor_Core i5,Processor_Core i7,Processor_Core i9,Processor_M1,Processor_M1 Max,...,RAM_32 GB,RAM_4 GB,RAM_64 GB,RAM_8 GB,Touch_Screen_No,Touch_Screen_Yes,<30cm,<35cm,<40cm,>40cm
0,"₹38,990",0,0,0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
1,"₹37,990",0,0,0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
2,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
3,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
4,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,"₹42,990",0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
833,"₹60,490",0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
834,"₹88,990",0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,1
835,"₹37,990",0,0,0,0,1,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0


## Improve Screen Size Data

In [143]:
df['Screen Size'].unique()

array(['39.62 cm (15.6 Inch)', '33.78 cm (13.3 inch)',
       '39.62 cm (15.6 inch)', '35.56 cm (14 inch)', '96.52 cm (38 cm)',
       '100.63 cm (39.62 cm)', '40.89 cm (16.1 Inch)',
       '35.56 cm (14 Inch)', '35.81 cm (14.1 Inch)',
       '35.81 cm (14.1 inch)', '40.64 cm (16 Inch)',
       '33.78 cm (13.3 Inch)', '40.64 cm (16 inch)',
       '39.01 cm (15.36 inch)', '34.54 cm (13.6 Inch)',
       '34.29 cm (13.5 inch)', '38.1 cm (15 inch)',
       '38.0 cm (14.96 Inch)', '29.46 cm (11.6 Inch)',
       '17.78 cm (7 Inch)', '43.94 cm (17.3 Inch)',
       '43.94 cm (17.3 inch)', '26.67 cm (10.5 inch)',
       '34.04 cm (13.4 Inch)', '34.29 cm (13.5 Inch)',
       '33.02 cm (13 inch)', '35.0 cm (13.78 inch)',
       '34.04 cm (13.4 inch)', '41.15 cm (16.2 inch)',
       '90.32 cm (35.56 cm)', '30.48 cm (12 inch)',
       '38.86 cm (15.3 Inch)', '40.89 cm (16.1 inch)',
       '36.07 cm (14.2 inch)', '38.0 cm (14.96 cm)',
       '31.5 cm (12.4 Inch)', '29.46 cm (11.6 inch)'], dtype=obje

In [163]:
# Create new columns with screen size
#def screen_size(df, column):
    #df['<40cm'] = ((df['screen_size(cm)'] >= 35) & (df['screen_size(cm)'] < 40)).astype(int)
    #df['>40cm'] = (df['screen_size(cm)'] >= 40).astype(int)
    #df.drop(columns=[column,'screen_size(cm)'], inplace=True)
    #return df

In [165]:
#df = screen_size(df, 'Screen Size')

Unnamed: 0,Brand,Processor,Operating System,Storage,RAM,Touch_Screen,Price,<30cm,<35cm,<40cm,>40cm
0,HP,Core i3,Windows 11 Home,512 GB,8 GB,No,"₹38,990",0,0,1,0
1,HP,Core i3,Windows 11 Home,512 GB,8 GB,No,"₹37,990",0,0,1,0
2,Apple,M1,Mac OS Big Sur,256 GB,8 GB,No,"₹70,990",0,1,0,0
3,Apple,M1,Mac OS Big Sur,256 GB,8 GB,No,"₹70,990",0,1,0,0
4,Apple,M1,Mac OS Big Sur,256 GB,8 GB,No,"₹70,990",0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
832,HP,Ryzen 5 Hexa Core,Windows 11 Home,512 GB,8 GB,No,"₹42,990",0,0,1,0
833,DELL,Core i3,Windows 11 Home,1 TB,8 GB,Yes,"₹60,490",0,0,1,0
834,MSI,Core i7,Windows 11 Home,4 TB,16 GB,No,"₹88,990",0,0,0,1
835,Infinix,Core i5,Windows 11 Home,512 GB,8 GB,No,"₹37,990",0,0,1,0


## Create dummies

In [None]:
#def create_dummies(df, columns):
    for column in columns:
        dummies = pd.get_dummies(df[column], prefix=column).astype(int)
        df = pd.concat([df, dummies], axis=1)
        df = df.drop(column, axis=1)
    return df

In [None]:
#f = create_dummies(df,['Processor', 'Storage', 'Brand', 'Operating System', 'RAM', 'Touch_Screen'])

## Clean Price Data

In [None]:
df

Unnamed: 0,Price,<30cm,<35cm,<40cm,>40cm,Processor_Athlon Dual Core,Processor_Celeron Dual Core,Processor_Celeron Quad Core,Processor_Core i3,Processor_Core i5,...,Operating System_macOS Ventura,RAM_12 GB,RAM_16 GB,RAM_18 GB,RAM_32 GB,RAM_4 GB,RAM_64 GB,RAM_8 GB,Touch_Screen_No,Touch_Screen_Yes
0,"₹38,990",0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
1,"₹37,990",0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
2,"₹70,990",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
3,"₹70,990",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
4,"₹70,990",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,"₹42,990",0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
833,"₹60,490",0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,1
834,"₹88,990",0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
835,"₹37,990",0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,1,0


In [174]:
df['Price'] = df['Price'].str.replace('₹', '')
df['Price'] = df['Price'].str.replace(',', '').astype(float)

In [175]:
# convert Price in Indian Rupees to USD 
df.Price = df.Price * 0.012

# Create Model ML

In [None]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = metrics.mean_squared_error(y_test, y_pred)

print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 2.4314594035169313e+29


In [None]:
data = {'pred': y_pred, 'real': y_test}
df_pred = pd.DataFrame(data)

In [None]:
df_pred

Unnamed: 0,pred,real
209,882.0,491.880
823,615.0,718.788
78,427.0,359.760
67,615.0,647.880
409,686.0,551.880
...,...,...
738,488.0,486.000
215,462.0,467.880
824,533.0,804.000
779,915.0,935.880


# Create DataFrame New

In [None]:
data =['brand','processor','operating_system','storage','RAM','screen_size','touch_screen']

Unnamed: 0,<30cm,<35cm,<40cm,>40cm,Processor_Athlon Dual Core,Processor_Celeron Dual Core,Processor_Celeron Quad Core,Processor_Core i3,Processor_Core i5,Processor_Core i7,...,Operating System_macOS Ventura,RAM_12 GB,RAM_16 GB,RAM_18 GB,RAM_32 GB,RAM_4 GB,RAM_64 GB,RAM_8 GB,Touch_Screen_No,Touch_Screen_Yes
209,0,0,1,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,1,0
823,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
78,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
67,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
409,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
215,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
824,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
779,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


In [176]:
class Laptop:
    def __init__(self):
        self.data = {
            'brand': None,
            'processor': None,
            'operating_system': None,
            'storage': None,
            'RAM': None,
            'screen_size': None,
            'touch_screen': None,
        }

    def crear_df(request_data):
        nuevo_personaje = {
        "brand": request_data["brand"],
        "processor": request_data["processor"],
        "operating_system": request_data["operating_system"],
        "storage": request_data["storage"],
        "RAM": request_data["RAM"],
        "screen_size": request_data["screen_size"],
        "touch_screen": request_data["touch_screen"]
        }
        return nuevo_personaje
    def display_data(self):
        """
        Método para mostrar los datos almacenados en la instancia.

        Ejemplo de uso:
        laptop = Laptop()
        laptop.set_data_from_api(api_data)
        laptop.display_data()
        """
        print("Datos del portátil:")
        for key, value in self.data.items():
            print(f"{key}: {value}")
