In [18]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

In [19]:
df = pd.read_csv("./compra-de-compus/train.csv",index_col=0)
df.index.name = None
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_in_euros
755,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i3 6006U 2GHz,8GB,256GB SSD,Intel HD Graphics 520,Windows 10,1.86kg,539.0
618,Dell,Inspiron 7559,Gaming,15.6,Full HD 1920x1080,Intel Core i7 6700HQ 2.6GHz,16GB,1TB HDD,Nvidia GeForce GTX 960<U+039C>,Windows 10,2.59kg,879.01
909,HP,ProBook 450,Notebook,15.6,Full HD 1920x1080,Intel Core i7 7500U 2.7GHz,8GB,1TB HDD,Nvidia GeForce 930MX,Windows 10,2.04kg,900.0
2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
286,Dell,Inspiron 3567,Notebook,15.6,Full HD 1920x1080,Intel Core i3 6006U 2.0GHz,4GB,1TB HDD,AMD Radeon R5 M430,Linux,2.25kg,428.0


In [20]:
df["Weight"].value_counts()

Weight
2.2kg     91
2.1kg     40
2.4kg     31
2.5kg     29
2.3kg     27
          ..
3.31kg     1
0.97kg     1
2.34kg     1
1.11kg     1
4.33kg     1
Name: count, Length: 165, dtype: int64

In [21]:
df["TypeName"].value_counts()

TypeName
Notebook              509
Gaming                143
Ultrabook             141
2 in 1 Convertible     80
Workstation            20
Netbook                19
Name: count, dtype: int64

In [22]:
df["Memory"].value_counts()

Memory
256GB SSD                        282
1TB HDD                          152
500GB HDD                         92
512GB SSD                         83
128GB SSD +  1TB HDD              67
128GB SSD                         54
256GB SSD +  1TB HDD              52
32GB Flash Storage                33
1TB SSD                           12
64GB Flash Storage                11
512GB SSD +  1TB HDD               8
2TB HDD                            8
256GB Flash Storage                7
16GB Flash Storage                 6
256GB SSD +  2TB HDD               6
32GB SSD                           5
1.0TB Hybrid                       5
128GB Flash Storage                4
180GB SSD                          3
16GB SSD                           3
512GB SSD +  2TB HDD               2
1TB SSD +  1TB HDD                 2
1TB HDD +  1TB HDD                 1
512GB Flash Storage                1
1.0TB HDD                          1
256GB SSD +  500GB HDD             1
8GB SSD                        

In [23]:
df["ScreenResolution"].value_counts()

ScreenResolution
Full HD 1920x1080                                349
1366x768                                         211
IPS Panel Full HD 1920x1080                      163
IPS Panel Full HD / Touchscreen 1920x1080         32
Full HD / Touchscreen 1920x1080                   30
1600x900                                          14
Quad HD+ / Touchscreen 3200x1800                  11
Touchscreen 1366x768                              11
IPS Panel 4K Ultra HD / Touchscreen 3840x2160     10
4K Ultra HD / Touchscreen 3840x2160                7
IPS Panel Quad HD+ / Touchscreen 3200x1800         6
Touchscreen 2560x1440                              6
IPS Panel 4K Ultra HD 3840x2160                    5
Touchscreen 2256x1504                              5
IPS Panel Retina Display 2560x1600                 5
1440x900                                           4
IPS Panel 1366x768                                 4
IPS Panel Retina Display 2304x1440                 4
IPS Panel Touchscreen 2560x14

In [24]:
df["Company"].value_counts()

Company
Lenovo       202
Dell         197
HP           194
Asus         121
Acer          74
MSI           37
Toshiba       34
Apple         17
Razer          6
Mediacom       6
Microsoft      5
Samsung        5
Xiaomi         3
Vero           2
Huawei         2
Google         2
Chuwi          2
Fujitsu        2
LG             1
Name: count, dtype: int64

In [25]:
df["Company"].value_counts()

Company
Lenovo       202
Dell         197
HP           194
Asus         121
Acer          74
MSI           37
Toshiba       34
Apple         17
Razer          6
Mediacom       6
Microsoft      5
Samsung        5
Xiaomi         3
Vero           2
Huawei         2
Google         2
Chuwi          2
Fujitsu        2
LG             1
Name: count, dtype: int64

In [26]:
df["OpSys"].value_counts()

OpSys
Windows 10      741
Linux            48
No OS            44
Windows 7        29
Chrome OS        24
macOS            11
Windows 10 S      7
Mac OS X          6
Android           2
Name: count, dtype: int64

In [27]:
# Dividir el DataFrame en conjunto de entrenamiento y conjunto de prueba
X = df.drop(columns=['Price_in_euros'])  # Features
y = df['Price_in_euros']  # Target variable

In [28]:
# Split train-test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
# Convertir 'OpSys' a valores numéricos
mapeo_opsys = {
    'Windows 10': 1,
    'Linux': 2,
    'No OS': 3,
    'Windows 7': 4,
    'Chrome OS': 5,
    'macOS': 6,
    'Windows 10 S': 7,
    'Mac OS X': 8,
    'Android': 9
}
X_train['OpSys_numerico'] = X_train['OpSys'].map(mapeo_opsys)

# Eliminar la columna original 'OpSys'
X_train.drop(columns=['OpSys'], inplace=True)

# Convertir 'Ram' a valores numéricos
X_train['ram_gb'] = X_train['Ram'].str.replace("GB","").astype(int) 

# Convertir 'Weight' a valores numéricos
X_train['peso'] = X_train['Weight'].str.replace("kg","").astype(float)

# Extraer características relevantes de la resolución de la pantalla
screen_resolution_df = X_train['ScreenResolution'].copy()
screen_resolution_df = screen_resolution_df.str.extract(r'(IPS|Retina|Touchscreen|Full HD|Quad HD|4K Ultra HD|2560x1440|1920x1080|1366x768)', expand=False).fillna('Other')
screen_resolution_df = screen_resolution_df.str.replace(' ', '_')
mapping = {
    'IPS': 1,
    'Retina': 2,
    'Touchscreen': 3,
    'Full_HD': 4,
    'Quad_HD': 5,
    '4K_Ultra_HD': 6,
    '2560x1440': 7,
    '1920x1080': 8,
    '1366x768': 9,
    'Other': 10
}
screen_resolution_df = screen_resolution_df.map(mapping)
screen_resolution_df = pd.get_dummies(screen_resolution_df, prefix='ScreenResolution')
X_train = pd.concat([X_train, screen_resolution_df], axis=1)
X_train = X_train.drop(columns=['ScreenResolution'])

# Convertir 'Memory' a valores numéricos y aplicar variables dummy
memory_df = X_train['Memory'].copy()
memory_df = memory_df.str.replace('GB', '').str.replace('TB', '000').str.extract(r'(\d+)').astype(float)
memory_df = memory_df.fillna(0)
memory_df = memory_df.rename(columns={0: 'Capacity_GB'})
memory_df['Storage_Type'] = X_train['Memory'].str.extract(r'(SSD|HDD|Flash Storage)').fillna('Other')
memory_df = pd.get_dummies(memory_df, columns=['Storage_Type'])
X_train = pd.concat([X_train, memory_df], axis=1)
X_train = X_train.drop(columns=['Memory'])

# Convertir 'Company' a valores numéricos
mapeo_marcas = {
    'Lenovo': 1,
    'Dell': 2,
    'HP': 3,
    'Asus': 4,
    'Acer': 5,
    'MSI': 6,
    'Toshiba': 7,
    'Apple': 8,
    'Razer': 9,
    'Mediacom': 10,
    'Microsoft': 11,
    'Samsung': 12,
    'Xiaomi': 13,
    'Vero': 14,
    'Huawei': 15,
    'Google': 16,
    'Chuwi': 17,
    'Fujitsu': 18,
    'LG': 19
}
X_train['Company_numerico'] = X_train['Company'].map(mapeo_marcas)

# Convertir 'TypeName' a valores numéricos
mapeo_tipos = {
    'Notebook': 1,
    'Gaming': 2,
    'Ultrabook': 3,
    '2 in 1 Convertible': 4,
    'Workstation': 5,
    'Netbook': 6,
    
}
X_train['TypeName_numerico'] = X_train['TypeName'].map(mapeo_tipos)


# Eliminar la columna original 'Cpu'
X_train.drop(columns=['Cpu',"Gpu",'Product', 'Ram', 'Weight', 'Company', 'TypeName',"screen_resolution"], inplace=True)



KeyError: "['screen_resolution'] not found in axis"

In [None]:
X_train.head()

Unnamed: 0,Inches,OpSys_numerico,ram_gb,peso,ScreenResolution_1,ScreenResolution_3,ScreenResolution_4,ScreenResolution_5,ScreenResolution_6,ScreenResolution_7,ScreenResolution_8,ScreenResolution_9,ScreenResolution_10,Capacity_GB,Storage_Type_Flash Storage,Storage_Type_HDD,Storage_Type_Other,Storage_Type_SSD,Company_numerico,TypeName_numerico
1118,17.3,4,8,3.0,True,False,False,False,False,False,False,False,False,1000.0,False,True,False,False,3,5
153,15.6,1,16,2.56,False,False,True,False,False,False,False,False,False,512.0,False,False,False,True,2,2
275,13.3,6,8,1.37,True,False,False,False,False,False,False,False,False,512.0,False,False,False,True,8,3
1100,14.0,4,4,1.54,False,False,True,False,False,False,False,False,False,500.0,False,True,False,False,3,1
131,17.3,1,16,2.8,False,False,True,False,False,False,False,False,False,256.0,False,False,False,True,2,1


In [None]:
# Entrenar modelo RandomForestRegressor
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)

In [None]:
# Convertir 'OpSys' a valores numéricos
mapeo_opsys = {
    'Windows 10': 1,
    'Linux': 2,
    'No OS': 3,
    'Windows 7': 4,
    'Chrome OS': 5,
    'macOS': 6,
    'Windows 10 S': 7,
    'Mac OS X': 8,
    'Android': 9
}
X_test['OpSys_numerico'] = X_test['OpSys'].map(mapeo_opsys)

# Eliminar la columna original 'OpSys'
X_test.drop(columns=['OpSys'], inplace=True)

# Convertir 'Ram' a valores numéricos
X_test['ram_gb'] = X_test['Ram'].str.replace("GB","").astype(int) 

# Convertir 'Weight' a valores numéricos
X_test['peso'] = X_test['Weight'].str.replace("kg","").astype(float)

# Extraer características relevantes de la resolución de la pantalla
screen_resolution_df = X_test['ScreenResolution'].copy()
screen_resolution_df = screen_resolution_df.str.extract(r'(IPS|Retina|Touchscreen|Full HD|Quad HD|4K Ultra HD|2560x1440|1920x1080|1366x768)', expand=False).fillna('Other')
screen_resolution_df = screen_resolution_df.str.replace(' ', '_')
mapping = {
    'IPS': 1,
    'Retina': 2,
    'Touchscreen': 3,
    'Full_HD': 4,
    'Quad_HD': 5,
    '4K_Ultra_HD': 6,
    '2560x1440': 7,
    '1920x1080': 8,
    '1366x768': 9,
    'Other': 10
}
screen_resolution_df = screen_resolution_df.map(mapping)
screen_resolution_df = pd.get_dummies(screen_resolution_df, prefix='ScreenResolution')
X_test = pd.concat([X_test, screen_resolution_df], axis=1)
X_test = X_test.drop(columns=['ScreenResolution'])

# Convertir 'Memory' a valores numéricos y aplicar variables dummy
memory_df = X_test['Memory'].copy()
memory_df = memory_df.str.replace('GB', '').str.replace('TB', '000').str.extract(r'(\d+)').astype(float)
memory_df = memory_df.fillna(0)
memory_df = memory_df.rename(columns={0: 'Capacity_GB'})
memory_df['Storage_Type'] = X_test['Memory'].str.extract(r'(SSD|HDD|Flash Storage)').fillna('Other')
memory_df = pd.get_dummies(memory_df, columns=['Storage_Type'])
X_test = pd.concat([X_test, memory_df], axis=1)
X_test = X_test.drop(columns=['Memory'])

# Convertir 'Company' a valores numéricos
mapeo_marcas = {
    'Lenovo': 1,
    'Dell': 2,
    'HP': 3,
    'Asus': 4,
    'Acer': 5,
    'MSI': 6,
    'Toshiba': 7,
    'Apple': 8,
    'Razer': 9,
    'Mediacom': 10,
    'Microsoft': 11,
    'Samsung': 12,
    'Xiaomi': 13,
    'Vero': 14,
    'Huawei': 15,
    'Google': 16,
    'Chuwi': 17,
    'Fujitsu': 18,
    'LG': 19
}
X_test['Company_numerico'] = X_test['Company'].map(mapeo_marcas)

# Convertir 'TypeName' a valores numéricos
mapeo_tipos = {
    'Notebook': 1,
    'Gaming': 2,
    'Ultrabook': 3,
    '2 in 1 Convertible': 4,
    'Workstation': 5,
    'Netbook': 6,
    
}
X_test['TypeName_numerico'] = X_test['TypeName'].map(mapeo_tipos)


# Eliminar la columna original 'Cpu'
X_test.drop(columns=['Cpu',"Gpu",'Product', 'Ram', 'Weight', 'Company', 'TypeName',"screen_resolution"], inplace=True)



In [None]:
# Hacer predicciones
y_pred = rf_reg.predict(X_train)

# Calcular el error cuadrático medio
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
print(f"RandomForest con el rmse es igual {rmse}")


RandomForest con el rmse es igual 127.4749388101248


In [None]:
# Alinear las columnas de X_test con las de X_train


# Hacer predicciones
y_pred = rf_reg.predict(X_test)

# Calcular el error cuadrático medio
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"RandomForest con el rmse es igual {rmse}")

ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- ScreenResolution_10
- ScreenResolution_7
- ScreenResolution_8


In [None]:
X_test.head()

Unnamed: 0,Inches,OpSys_numerico,ram_gb,peso,ScreenResolution_1,ScreenResolution_3,ScreenResolution_4,ScreenResolution_5,ScreenResolution_6,ScreenResolution_7,...,Cpu_Model_E-Series E2-9000e 1.5GHz,Cpu_Model_FX 9830P 3GHz,Cpu_Model_Pentium Dual Core 4405U 2.1GHz,Cpu_Model_Pentium Dual Core 4405Y 1.5GHz,Cpu_Model_Pentium Dual Core N4200 1.1GHz,Cpu_Model_Pentium Quad Core N3700 1.6GHz,Cpu_Model_Pentium Quad Core N3710 1.6GHz,Cpu_Model_Pentium Quad Core N4200 1.1GHz,Cpu_Model_Ryzen 1700 3GHz,Cpu_Model_Xeon E3-1535M v5 2.9GHz
451,15.6,1,8,2.2,True,False,False,False,False,0,...,0,0,0,0,0,0,False,0,0,0
802,15.6,1,8,2.08,False,False,True,False,False,0,...,0,0,0,0,0,0,False,0,0,0
723,14.0,1,8,1.87,True,False,False,False,False,0,...,0,0,0,0,0,0,False,0,0,0
682,14.0,1,4,1.63,True,False,False,False,False,0,...,0,0,0,0,0,0,False,0,0,0
1306,15.6,1,4,2.2,False,False,False,False,False,0,...,0,0,0,0,0,0,False,0,0,0


In [None]:
df_test = pd.read_csv("./compra-de-compus/test.csv")
df_test.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight
0,209,Lenovo,Legion Y520-15IKBN,Gaming,15.6,Full HD 1920x1080,Intel Core i7 7700HQ 2.8GHz,16GB,512GB SSD,Nvidia GeForce GTX 1060,No OS,2.4kg
1,1281,Acer,Aspire ES1-531,Notebook,15.6,1366x768,Intel Celeron Dual Core N3060 1.6GHz,4GB,500GB HDD,Intel HD Graphics 400,Linux,2.4kg
2,1168,Lenovo,V110-15ISK (i3-6006U/4GB/1TB/No,Notebook,15.6,1366x768,Intel Core i3 6006U 2.0GHz,4GB,1TB HDD,Intel HD Graphics 520,No OS,1.9kg
3,1231,Dell,Inspiron 7579,2 in 1 Convertible,15.6,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,Windows 10,2.191kg
4,1020,HP,ProBook 640,Notebook,14.0,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,4GB,256GB SSD,Intel HD Graphics 620,Windows 10,1.95kg


In [None]:
import pandas as pd

# Función para extraer información de GPU
def extract_gpu_info(gpu_string):
    if 'Intel' in gpu_string:
        return 'Intel', gpu_string.split('Intel')[1].strip()
    elif 'Nvidia' in gpu_string:
        return 'Nvidia', gpu_string.split('Nvidia')[1].strip()
    elif 'AMD' in gpu_string:
        return 'AMD', gpu_string.split('AMD')[1].strip()
    else:
        return 'Other', 'Other'

# Función para extraer información de CPU
def extract_cpu_info(cpu_string):
    if 'Intel' in cpu_string:
        return 'Intel', cpu_string.split('Intel')[1].strip()
    elif 'AMD' in cpu_string:
        return 'AMD', cpu_string.split('AMD')[1].strip()
    else:
        return 'Other', 'Other'

# Convertir 'OpSys' a valores numéricos
mapeo_opsys = {
    'Windows 10': 1,
    'Linux': 2,
    'No OS': 3,
    'Windows 7': 4,
    'Chrome OS': 5,
    'macOS': 6,
    'Windows 10 S': 7,
    'Mac OS X': 8,
    'Android': 9
}
df_test['OpSys_numerico'] = df_test['OpSys'].map(mapeo_opsys)

# Eliminar la columna original 'OpSys'
df_test.drop(columns=['OpSys'], inplace=True)

# Convertir 'Ram' a valores numéricos
df_test['ram_gb'] = df_test['Ram'].str.replace("GB","").astype(int) 

# Convertir 'Weight' a valores numéricos
df_test['peso'] = df_test['Weight'].str.replace("kg","").astype(float)

# Extraer características relevantes de la resolución de la pantalla
screen_resolution_df = df_test['ScreenResolution'].copy()
screen_resolution_df = screen_resolution_df.str.extract(r'(IPS|Retina|Touchscreen|Full HD|Quad HD|4K Ultra HD|2560x1440|1920x1080|1366x768)', expand=False).fillna('Other')
screen_resolution_df = screen_resolution_df.str.replace(' ', '_')
mapping = {
    'IPS': 1,
    'Retina': 2,
    'Touchscreen': 3,
    'Full_HD': 4,
    'Quad_HD': 5,
    '4K_Ultra_HD': 6,
    '2560x1440': 7,
    '1920x1080': 8,
    '1366x768': 9,
    'Other': 10
}
screen_resolution_df = screen_resolution_df.map(mapping)
screen_resolution_df = pd.get_dummies(screen_resolution_df, prefix='ScreenResolution')
df_test = pd.concat([df_test, screen_resolution_df], axis=1)
df_test = df_test.drop(columns=['ScreenResolution'])

# Convertir 'Memory' a valores numéricos y aplicar variables dummy
memory_df = df_test['Memory'].copy()
memory_df = memory_df.str.replace('GB', '').str.replace('TB', '000').str.extract(r'(\d+)').astype(float)
memory_df = memory_df.fillna(0)
memory_df = memory_df.rename(columns={0: 'Capacity_GB'})
memory_df['Storage_Type'] = df_test['Memory'].str.extract(r'(SSD|HDD|Flash Storage)').fillna('Other')
memory_df = pd.get_dummies(memory_df, columns=['Storage_Type'])
df_test = pd.concat([df_test, memory_df], axis=1)
df_test = df_test.drop(columns=['Memory'])

# Convertir 'Company' a valores numéricos
mapeo_marcas = {
    'Lenovo': 1,
    'Dell': 2,
    'HP': 3,
    'Asus': 4,
    'Acer': 5,
    'MSI': 6,
    'Toshiba': 7,
    'Apple': 8,
    'Razer': 9,
    'Mediacom': 10,
    'Microsoft': 11,
    'Samsung': 12,
    'Xiaomi': 13,
    'Vero': 14,
    'Huawei': 15,
    'Google': 16,
    'Chuwi': 17,
    'Fujitsu': 18,
    'LG': 19
}
df_test['Company_numerico'] = df_test['Company'].map(mapeo_marcas)

# Convertir 'TypeName' a valores numéricos
mapeo_tipos = {
    'Notebook': 1,
    'Gaming': 2,
    'Ultrabook': 3,
    '2 in 1 Convertible': 4,
    'Workstation': 5,
    'Netbook': 6,
    
}
df_test['TypeName_numerico'] = df_test['TypeName'].map(mapeo_tipos)

# Convertir 'Gpu' a valores numéricos
df_test['Gpu_Brand'], df_test['Gpu_Model'] = zip(*df_test['Gpu'].apply(lambda x: extract_gpu_info(x)))

# Aplicar one-hot encoding a las columnas 'Gpu_Brand' y 'Gpu_Model'
df_test = pd.get_dummies(df_test, columns=['Gpu_Brand', 'Gpu_Model'])

# Eliminar la columna original 'Gpu'
df_test.drop(columns=['Gpu'], inplace=True)

# Convertir 'Cpu' a valores numéricos
df_test['Cpu_Brand'], df_test['Cpu_Model'] = zip(*df_test['Cpu'].apply(lambda x: extract_cpu_info(x)))

# Aplicar one-hot encoding a las columnas 'Cpu_Brand' y 'Cpu_Model'
df_test = pd.get_dummies(df_test, columns=['Cpu_Brand', 'Cpu_Model'])

# Eliminar la columna original 'Cpu'
df_test.drop(columns=['Cpu','Product', 'Ram', 'Weight', 'Company', 'TypeName'], inplace=True)



In [None]:


# Hacer predicciones
prediccion= rf_reg.predict(df_test)



ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Cpu_Brand_Other
- Cpu_Model_A10-Series 9620P 2.5GHz
- Cpu_Model_A12-Series 9700P 2.5GHz
- Cpu_Model_A4-Series 7210 2.2GHz
- Cpu_Model_Celeron Dual Core 3205U 1.5GHz
- ...
Feature names seen at fit time, yet now missing:
- Cpu_Model_A10-Series A10-9620P 2.5GHz
- Cpu_Model_A6-Series 7310 2GHz
- Cpu_Model_A6-Series 9220 2.9GHz
- Cpu_Model_A6-Series A6-9220 2.5GHz
- Cpu_Model_A9-Series 9420 2.9GHz
- ...


In [None]:
prediccion.shape

(391,)

In [None]:
sample = pd.read_csv("./compra-de-compus/sample_submission.csv")
sample.head()

Unnamed: 0,laptop_ID,Price_in_euros
0,209,1949.1
1,1281,805.0
2,1168,1101.0
3,1231,1293.8
4,1020,1832.6


In [None]:
submission = pd.DataFrame({"laptop_ID":df_test.index,"Price_in_euros":prediccion})
submission.shape


(391, 2)

In [None]:
submission.head()

Unnamed: 0,laptop_ID,Price_in_euros
0,0,1661.8553
1,1,292.327
2,2,419.1604
3,3,953.5349
4,4,860.1551


In [None]:
ruta_archivo = "./compra-de-compus/submission.csv"

# Guardar el DataFrame en un archivo CSV
submission.to_csv(ruta_archivo, index=False)

In [None]:
def chequeador(df_to_submit):
    """
    Esta función se asegura de que tu submission tenga la forma requerida por Kaggle.
    
    Si es así, se guardará el dataframe en un `csv` y estará listo para subir a Kaggle.
    
    Si no, LEE EL MENSAJE Y HAZLE CASO.
    
    Si aún no:
    - apaga tu ordenador, 
    - date una vuelta, 
    - enciendelo otra vez, 
    - abre este notebook y 
    - leelo todo de nuevo. 
    Todos nos merecemos una segunda oportunidad. También tú.
    """
    if df_to_submit.shape == sample.shape:
        if df_to_submit.columns.all() == sample.columns.all():
            if df_to_submit.laptop_ID.all() == sample.laptop_ID.all():
                print("You're ready to submit!")
                submission.to_csv("submission.csv", index = False) #muy importante el index = False
                urllib.request.urlretrieve("https://www.mihaileric.com/static/evaluation-meme-e0a350f278a36346e6d46b139b1d0da0-ed51e.jpg", "gfg.png")     
                img = Image.open("gfg.png")
                img.show()   
            else:
                print("Check the ids and try again")
        else:
            print("Check the names of the columns and try again")
    else:
        print("Check the number of rows and/or columns and try again")
        print("\nMensaje secreto de Iván y Manuel: No me puedo creer que después de todo este notebook hayas hecho algún cambio en las filas de `laptops_test.csv`. Lloramos.")

In [None]:
chequeador(submission)

Check the ids and try again
