# Detector de imagenes (Fumadores)

## Importación del dataset

In [1]:
!apt-get install unrar
!pip install rarfile

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unrar is already the newest version (1:6.1.5-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [2]:
import rarfile

rarfile.UNRAR_TOOL = "/usr/bin/unrar"
!wget https://github.com/repositoriosHackaton/SIC25es-Remember-Us-Recuerdanos-/raw/refs/heads/main/recursos/dataset.rar

rar_path = "/content/dataset.rar"  # Ruta del archivo RAR
extract_path = "dataset"  # Carpeta de salida

with rarfile.RarFile(rar_path) as rar_ref:
    rar_ref.extractall(extract_path)

print("Archivo descomprimido correctamente.")

--2025-03-25 03:08:56--  https://github.com/repositoriosHackaton/SIC25es-Remember-Us-Recuerdanos-/raw/refs/heads/main/recursos/dataset.rar
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/repositoriosHackaton/SIC25es-Remember-Us-Recuerdanos-/refs/heads/main/recursos/dataset.rar [following]
--2025-03-25 03:08:57--  https://raw.githubusercontent.com/repositoriosHackaton/SIC25es-Remember-Us-Recuerdanos-/refs/heads/main/recursos/dataset.rar
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 65567508 (63M) [application/octet-stream]
Saving to: ‘dataset.rar’


2025-03-25 03:08:57 (161 MB/s) - ‘dataset.rar

## Entrenamiento de modelos

### SVC - Sklearn

In [None]:
from sklearn.model_selection import StratifiedKFold, GridSearchCV, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from PIL import Image
import numpy as np
import os

#### Tratamiento de datos

In [None]:
# Función para cargar las imagenes desde el folder y almacenarlas en forma de vector númerico, junto con otro array con su clasificación
def svc_loadImages(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = Image.open(os.path.join(folder, filename)).convert("L")
        img = img.resize((250,250))
        img_array = np.array(img).flatten()
        images.append(img_array)
        label = 0 if "notsmoking" in filename else 1 # Si el nombre de la imagen es 'notsmoking' colocar 0, caso contrario 1
        labels.append(label)
    return np.array(images), np.array(labels)

In [None]:
#Separacion imagenes train y test
scv_Xtrain, svc_ytrain = svc_loadImages("dataset/dataset/Training")
svc_Xval, svc_yval = svc_loadImages("dataset/dataset/Validation")
svc_Xtest, svc_ytest = svc_loadImages("dataset/dataset/Testing")

print(f"Datos de entrenamiento: {len(scv_Xtrain)}\tPorcentaje: {(len(scv_Xtrain)*100/1120):.2f}")
print(f"Datos de validación: {len(svc_Xval)}\tPorcentaje: {(len(svc_Xval)*100/1120):.2f}")
print(f"Datos de prueba: {len(svc_Xtest)}\t\tPorcentaje: {(len(svc_Xtest)*100/1120):.2f}")


#Escalado de imagenes con StandarScaler
scaler = StandardScaler()

scv_Xtrain_st = scaler.fit_transform(scv_Xtrain)
scv_Xval_st = scaler.transform(svc_Xval)
scv_Xtest_st = scaler.transform(svc_Xtest)

scv_Xtrain[:2], scv_Xtrain_st[:2]


#Reducir dimensionalidad con PCA a 90 componentes (componentes originales = 250)
pca = PCA(n_components=90)

scv_Xtrain_pca = pca.fit_transform(scv_Xtrain_st)
scv_Xval_pca = pca.transform(scv_Xval_st)
scv_Xtest_pca = pca.transform(scv_Xtest_st)

Datos de entrenamiento: 716	Porcentaje: 63.93
Datos de validación: 180	Porcentaje: 16.07
Datos de prueba: 224		Porcentaje: 20.00


#### Entrenamiento del modelo svc

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
import numpy as np

svm = SVC(kernel="linear", random_state=42)

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

best_score = -np.inf
best_model = None

# Realizar validación cruzada
for train_idx, val_idx in cv.split(scv_Xtrain_pca, svc_ytrain):
    X_train_fold, X_val_fold = scv_Xtrain_pca[train_idx], scv_Xtrain_pca[val_idx]
    y_train_fold, y_val_fold = svc_ytrain[train_idx], svc_ytrain[val_idx]


    svm.fit(X_train_fold, y_train_fold)

    # Evaluar el modelo
    score = svm.score(X_val_fold, y_val_fold)

    if score > best_score:
        best_score = score
        best_model = SVC(kernel="linear", random_state=42)
        best_model.fit(scv_Xtrain_pca, svc_ytrain)

# Porcenaje final
print(f"Puntuación del mejor modelo: {best_score:.2f}")

Puntuación del mejor modelo: 0.77


#### Exportación de recursos (modelo, scaler, etc)

In [None]:
# Código

### MobileNetV2 (CNN) (keras)

In [3]:
#algunas importacione
import os
import shutil
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

Pre-procesamiento de la data

In [4]:
#en construcion#####
# shutil.rmtree("/content/dataset/dataset/training")

ruta_origen = "dataset/dataset/Training"
ruta_smoking = "dataset/dataset/training/smoking"
ruta_notsmoking = "dataset/dataset/training/notsmoking"

os.makedirs(ruta_smoking, exist_ok=True)
os.makedirs(ruta_notsmoking, exist_ok=True)

for archivo in os.listdir(ruta_origen):
  if os.path.isfile(os.path.join(ruta_origen, archivo)):
    if "notsmoking" not in archivo.lower() :
            shutil.move(os.path.join(ruta_origen, archivo), os.path.join(ruta_notsmoking, archivo))
    elif(True):
            shutil.move(os.path.join(ruta_origen, archivo), os.path.join(ruta_smoking, archivo))


datadir = "/content/dataset/dataset/training"
imgsize = (224, 224)
batchsize = (32)

datagen = ImageDataGenerator(rescale= 1/255, validation_split=0.2)

train_data = datagen.flow_from_directory(datadir,target_size=imgsize,
                                         batch_size=batchsize,class_mode='binary',subset='training')
val_data = datagen.flow_from_directory(datadir,target_size=imgsize,
                                       batch_size=batchsize,class_mode='binary',subset='validation')

Found 574 images belonging to 2 classes.
Found 142 images belonging to 2 classes.


Instanciación del modelo (preentrenado)

In [13]:
mnet = keras.applications.MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet',classifier_activation="softmax")
mnet.trainable = False

model = keras.Sequential([
    mnet,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(1, activation='sigmoid')  # Clasificación binaria
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(train_data, validation_data=val_data, epochs=60)

Epoch 1/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 519ms/step - accuracy: 0.6096 - loss: 0.6437 - val_accuracy: 0.8028 - val_loss: 0.4196
Epoch 2/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 99ms/step - accuracy: 0.8726 - loss: 0.3021 - val_accuracy: 0.7958 - val_loss: 0.4341
Epoch 3/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 98ms/step - accuracy: 0.9167 - loss: 0.2437 - val_accuracy: 0.8239 - val_loss: 0.3773
Epoch 4/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 126ms/step - accuracy: 0.9399 - loss: 0.1871 - val_accuracy: 0.8169 - val_loss: 0.4542
Epoch 5/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - accuracy: 0.9461 - loss: 0.1592 - val_accuracy: 0.8310 - val_loss: 0.3999
Epoch 6/60
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 98ms/step - accuracy: 0.9539 - loss: 0.1321 - val_accuracy: 0.8099 - val_loss: 0.4985
Epoch 7/60
[1m18/18[0m [32m