# IMPORT LIBRARIES

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics


# OBTAIN & CLEAN DATA

In [10]:
df = pd.read_csv('Laptops.csv', index_col=0)
df.head(2)

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Brand             837 non-null    object
 1   Model Name        837 non-null    object
 2   Processor         837 non-null    object
 3   Operating System  837 non-null    object
 4   Storage           825 non-null    object
 5   RAM               837 non-null    object
 6   Screen Size       837 non-null    object
 7   Touch_Screen      837 non-null    object
 8   Price             837 non-null    object
dtypes: object(9)
memory usage: 65.4+ KB


## Remove Columns that's not important to the ML

In [12]:
df.drop(columns=['Model Name'], inplace=True)

## CLASS Processor Data

In [13]:
class DataFrameProcessor:
    def __init__(self, df):
        self.df = df

    def create_dummies(self, columns):
        for column in columns:
            dummies = pd.get_dummies(self.df[column], prefix=column).astype(int)
            self.df = pd.concat([self.df, dummies], axis=1)
            self.df = self.df.drop(column, axis=1)
        return self.df

    def screen_size(self, column):
        self.df['screen_size(cm)'] = self.df[column].str.split(' ', expand=True)[0].astype(float)
        self.df['<30cm'] = (self.df['screen_size(cm)'] < 30).astype(int)
        self.df['<35cm'] = ((self.df['screen_size(cm)'] >= 30) & (self.df['screen_size(cm)'] < 35)).astype(int)
        self.df['<40cm'] = ((self.df['screen_size(cm)'] >= 35) & (self.df['screen_size(cm)'] < 40)).astype(int)
        self.df['>40cm'] = (self.df['screen_size(cm)'] >= 40).astype(int)
        self.df.drop(columns=[column,'screen_size(cm)'], inplace=True)
        return self.df

In [14]:
processor = DataFrameProcessor(df)
df = processor.create_dummies(['Processor', 'Storage', 'Brand', 'Operating System', 'RAM', 'Touch_Screen'])
df = processor.screen_size('Screen Size')

In [15]:
df

Unnamed: 0,Price,Processor_Athlon Dual Core,Processor_Celeron Dual Core,Processor_Celeron Quad Core,Processor_Core i3,Processor_Core i5,Processor_Core i7,Processor_Core i9,Processor_M1,Processor_M1 Max,...,RAM_32 GB,RAM_4 GB,RAM_64 GB,RAM_8 GB,Touch_Screen_No,Touch_Screen_Yes,<30cm,<35cm,<40cm,>40cm
0,"₹38,990",0,0,0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
1,"₹37,990",0,0,0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
2,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
3,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
4,"₹70,990",0,0,0,0,0,0,0,1,0,...,0,0,0,1,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,"₹42,990",0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0
833,"₹60,490",0,0,0,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
834,"₹88,990",0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,1
835,"₹37,990",0,0,0,0,1,0,0,0,0,...,0,0,0,1,1,0,0,0,1,0


In [16]:
df['Price'] = df['Price'].str.replace('₹', '')
df['Price'] = df['Price'].str.replace(',', '').astype(float)

In [17]:
# convert Price in Indian Rupees to USD 
df.Price = df.Price * 0.012

# Create Model ML

In [18]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = metrics.mean_squared_error(y_test, y_pred)

print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 1.6980654470632233e+29


In [19]:
data = {'pred': y_pred, 'real': y_test}
df_pred = pd.DataFrame(data)

In [20]:
df_pred

Unnamed: 0,pred,real
209,878.3125,491.880
823,615.8125,718.788
78,436.3125,359.760
67,615.8125,647.880
409,676.3125,551.880
...,...,...
738,496.9375,486.000
215,452.1875,467.880
824,552.6875,804.000
779,931.5625,935.880


# Create DataFrame New

In [21]:
data =['brand','processor','operating_system','storage','RAM','screen_size','touch_screen']

In [22]:
class Laptop:
    def __init__(self):
        self.data = {
            'brand': None,
            'processor': None,
            'operating_system': None,
            'storage': None,
            'RAM': None,
            'screen_size': None,
            'touch_screen': None,
        }

    def crear_df(request_data):
        nuevo_df = {
        "brand": request_data["brand"],
        "processor": request_data["processor"],
        "operating_system": request_data["operating_system"],
        "storage": request_data["storage"],
        "RAM": request_data["RAM"],
        "screen_size": request_data["screen_size"],
        "touch_screen": request_data["touch_screen"]
        }
        return nuevo_df
    def display_data(self):
        """
        Método para mostrar los datos almacenados en la instancia.

        Ejemplo de uso:
        laptop = Laptop()
        laptop.set_data_from_api(api_data)
        laptop.display_data()
        """
        print("Datos del portátil:")
        for key, value in self.data.items():
            print(f"{key}: {value}")
