In [19]:
from maze_solver import *
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split


## 2.1) ENTRENAMIENTO DE LA RED NEURONAL

 - Green (grama) -> Weight: 2
 - Yellow (arena) -> Weight: 3
 - Blue (agua) -> Weight: 9
 - Grey (pavimento) -> Weight: 1
 - Pink (chicle) -> Weight: 10

In [11]:
df = pd.read_csv("data/final_data_colors.csv")

print(f"Primeras 10 filas:\n{df.head(10)}")
print(f"Tipos de datos:\n{df.dtypes}")
print(f"Etiquetas únicas:\n{df['label'].unique()}")
print(f"Conteo por etiqueta:\n{df['label'].value_counts()}")
print(f"Dimensiones:\n{df.shape}")
print(f"Tipos de datos por columna:\n{df.info()}")


Primeras 10 filas:
   red  green  blue   label
0   20    139   240    Blue
1  174     83    72   Brown
2  144    249   131   Green
3  168     25   156    Pink
4   30    182   136   Green
5  199    150   175    Pink
6  199     93   154    Pink
7  231    243    25  Yellow
8   48    213    76   Green
9   38      3    64    Blue
Tipos de datos:
red       int64
green     int64
blue      int64
label    object
dtype: object
Etiquetas únicas:
['Blue' 'Brown' 'Green' 'Pink' 'Yellow' 'Orange' 'Purple' 'Red' 'Grey'
 'White' 'Black']
Conteo por etiqueta:
label
Green     1457
Blue      1107
Pink       579
Purple     553
Brown      376
Yellow     285
Red        236
Orange     205
Grey       174
Black       51
White       29
Name: count, dtype: int64
Dimensiones:
(5052, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5052 entries, 0 to 5051
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   red     5052 non-null   int64 
 1   green   5052

In [21]:
df_filtered = df[df["label"].isin(["Green", "Yellow", "Blue", "Grey", "Pink"])]

lb = LabelBinarizer()

# esto lo que hace es convertir las etiquetas de texto a números binarios, por ejemplo:
# Green -> [1, 0, 0, 0, 0]
Y = lb.fit_transform(df_filtered["label"])

print(f"Y shape: {Y.shape}")
print(f"Y sample: {Y[0]}") # type: ignore

X = df_filtered[["red", "green", "blue"]].values
X = X / 255.0
print(f"X shape: {X.shape}")
print(f"X min: {X.min()}, X max: {X.max()}")


Y shape: (3602, 5)
Y sample: [1 0 0 0 0]
X shape: (3602, 3)
X min: 0.0, X max: 0.996078431372549


In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}, Y_train shape: {Y_train.shape}")
print(f"X_test shape: {X_test.shape}, Y_test shape: {Y_test.shape}")

print("Distribucion del train:")
for i, label in enumerate(lb.classes_):
    count = Y_train[:, i].sum()
    print(f"Label {label}: {count} samples")

X_train shape: (2881, 3), Y_train shape: (2881, 5)
X_test shape: (721, 3), Y_test shape: (721, 5)
Distribucion del train:
Label Blue: 877 samples
Label Green: 1166 samples
Label Grey: 134 samples
Label Pink: 468 samples
Label Yellow: 236 samples
