# IMPORT LIBRARIES

In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics


# OBTAIN & CLEAN DATA

In [43]:
df = pd.read_csv('Laptops.csv', index_col=0)
df.head(2)

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"


## rename df

In [44]:
df = df.rename(columns={'Processor':'processor', 'Storage':'storage', 'Brand':'brand', 'Operating System':'operating_system', 'RAM':'ram', 'Touch_Screen':'touch_screen'})
df.columns

Index(['brand', 'Model Name', 'processor', 'operating_system', 'storage',
       'ram', 'Screen Size', 'touch_screen', 'Price'],
      dtype='object')

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 837 entries, 0 to 836
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   brand             837 non-null    object
 1   Model Name        837 non-null    object
 2   processor         837 non-null    object
 3   operating_system  837 non-null    object
 4   storage           825 non-null    object
 5   ram               837 non-null    object
 6   Screen Size       837 non-null    object
 7   touch_screen      837 non-null    object
 8   Price             837 non-null    object
dtypes: object(9)
memory usage: 65.4+ KB


## Remove Columns that's not important to the ML

In [46]:
df.drop(columns=['Model Name'], inplace=True)

## CLASS Processor Data

In [47]:
class DataFrameProcessor:
    def __init__(self, df):
        self.df = df

    def create_dummies(self, columns):
        for column in columns:
            column_lower = column.lower()
            dummies = pd.get_dummies(self.df[column], prefix=column_lower).astype(int)
            self.df = pd.concat([self.df, dummies], axis=1)
            self.df = self.df.drop(column, axis=1)
        return self.df

    def screen_size(self, column):
        self.df['screen_size(cm)'] = self.df[column].str.split(' ', expand=True)[0].astype(float)
        self.df['<30cm'] = (self.df['screen_size(cm)'] < 30).astype(int)
        self.df['<35cm'] = ((self.df['screen_size(cm)'] >= 30) & (self.df['screen_size(cm)'] < 35)).astype(int)
        self.df['<40cm'] = ((self.df['screen_size(cm)'] >= 35) & (self.df['screen_size(cm)'] < 40)).astype(int)
        self.df['>40cm'] = (self.df['screen_size(cm)'] >= 40).astype(int)
        self.df.drop(columns=[column,'screen_size(cm)'], inplace=True)
        return self.df

In [48]:
processor = DataFrameProcessor(df)
df = processor.screen_size('Screen Size')
df = processor.create_dummies(['processor', 'storage', 'brand', 'operating_system', 'ram', 'touch_screen'])

AttributeError: 'Index' object has no attribute 'apply'

In [50]:
df['Price'] = df['Price'].str.replace('₹', '')
df['Price'] = df['Price'].str.replace(',', '').astype(float)

In [51]:
# convert Price in Indian Rupees to USD 
df.Price = df.Price * 0.012

# Create Model ML

In [52]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = metrics.mean_squared_error(y_test, y_pred)

print(f'Error cuadrático medio: {mse}')

Error cuadrático medio: 2.4314594035169313e+29


In [53]:
data = {'pred': y_pred, 'real': y_test}
df_pred = pd.DataFrame(data)

In [54]:
X_test

Unnamed: 0,<30cm,<35cm,<40cm,>40cm,processor_Athlon Dual Core,processor_Celeron Dual Core,processor_Celeron Quad Core,processor_Core i3,processor_Core i5,processor_Core i7,...,operating_system_macOS Ventura,ram_12 GB,ram_16 GB,ram_18 GB,ram_32 GB,ram_4 GB,ram_64 GB,ram_8 GB,touch_screen_No,touch_screen_Yes
209,0,0,1,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,1,0
823,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
78,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
67,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
409,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
215,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,0
824,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
779,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


# Create DataFrame New

In [55]:
ejemplo_base = df.drop(df.index[0:837])
ejemplo_base = ejemplo_base.drop(columns='Price')
ejemplo_base.fillna(0)

Unnamed: 0,<30cm,<35cm,<40cm,>40cm,processor_Athlon Dual Core,processor_Celeron Dual Core,processor_Celeron Quad Core,processor_Core i3,processor_Core i5,processor_Core i7,...,operating_system_macOS Ventura,ram_12 GB,ram_16 GB,ram_18 GB,ram_32 GB,ram_4 GB,ram_64 GB,ram_8 GB,touch_screen_No,touch_screen_Yes


Unnamed: 0,<30cm,<35cm,<40cm,>40cm,processor_Athlon Dual Core,processor_Celeron Dual Core,processor_Celeron Quad Core,processor_Core i3,processor_Core i5,processor_Core i7,...,operating_system_macOS Ventura,ram_12 GB,ram_16 GB,ram_18 GB,ram_32 GB,ram_4 GB,ram_64 GB,ram_8 GB,touch_screen_No,touch_screen_Yes


In [57]:
class Laptop:
    def __init__(self):
        self.data = {
            'brand': None,
            'processor': None,
            'operating_system': None,
            'storage': None,
            'ram': None,
            'screen_size': None,
            'touch_screen': None,
        }

    def crear_df(self, request_data):
        nuevo_df = {
        "brand": request_data["brand"],
        "processor": request_data["processor"],
        "operating_system": request_data["operating_system"],
        "storage": request_data["storage"],
        "ram": request_data["ram"],
        "screen_size": request_data["screen_size"],
        "touch_screen": request_data["touch_screen"]
        }
        nuevo_df = {k: [v] for k, v in nuevo_df.items()} 

        return pd.DataFrame(nuevo_df)
    
    def display_data(self):
        laptop = Laptop()
        laptop.set_data_from_api(api_data)
        laptop.display_data()
        
        print("Datos del portátil:")
        for key, value in self.data.items():
            print(f"{key}: {value}")
