In [2]:
import zipfile
import os
import numpy as np

# extracted_dir = '/content/drive/MyDrive/IA/Mariposas_images'
extracted_dir = '/home/squispeb/university/ML-Classification-p/mariposasimg'

if not (os.path.exists(extracted_dir) and os.path.isdir(extracted_dir)):
  zip_file_path = '/home/squispeb/university/ML-Classification-p/Mariposas.zip'
  print(zip_file_path)
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
      zip_ref.extractall(extracted_dir)
  print("Everything extracted")
else:
  print("Everything already extracted")

Everything already extracted


In [3]:
import os
import numpy as np
from PIL import Image
import pywt
import pywt.data

images_dir = extracted_dir + "/images"
file_list = os.listdir(images_dir)

newy = 64
newx = 128

new_images_path = extracted_dir + "/images" + str(newx) + "x" + str(newy)
if not os.path.exists(new_images_path):
  os.makedirs(new_images_path)

  for filename in file_list:
    if filename.endswith(('.jpg', '.png', '.jpeg', '.gif', '.bmp')):


        image_path = os.path.join(images_dir, filename)

        image = Image.open(image_path)
        resized_image = image.resize((newx, newy))


        resized_image.save( new_images_path + "/" + filename)
        resized_image.close()
        image.close()

Y = []
X = []
for filename in file_list:
  if filename.endswith(('.jpg', '.png', '.jpeg', '.gif', '.bmp')):
    Y.append([int(filename[0:3])])
    image_path = os.path.join(new_images_path, filename)
    image = Image.open(image_path)
    image = image.convert('L')
    wavelet = 'haar'  # Puedes cambiar la wavelet según tus necesidades
    coeffs = pywt.dwt2(image, wavelet)
    approximation, (horizontal_detail, vertical_detail, diagonal_detail) = coeffs
    vector_caracteristico = approximation.flatten()
    X.append(vector_caracteristico)

X = np.array(X)
print(X.shape)
Y = np.array(Y)
from sklearn.decomposition import PCA
import numpy as np

pca = PCA(n_components=50)

# Ajustar PCA a tus datos.
X = pca.fit_transform(X)

(832, 2048)


In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class SuperVectorMachine:
    def __init__(self, C=1.0, learning_rate=0.01, max_iter=1000, tol=1e-5):
        self.C = C
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.tol = tol
        self.w = None
        self.b = None

    def train(self, X_train, Y_train):
        # Normalizar los datos
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)

        self.X = X_train
        self.Y = Y_train
        self.lambda_list = np.zeros(len(X_train))
        self.w = np.zeros(X_train.shape[1])  # Inicializar self.w como un vector de ceros
        self.b = 0.0

        for _ in range(self.max_iter):
            alpha_old = np.copy(self.lambda_list)
            for i in range(len(X_train)):
                Ei = self.predict(X_train[i]) - Y_train[i]
                if (Y_train[i] * Ei < -self.tol and self.lambda_list[i] < self.C) or \
                   (Y_train[i] * Ei > self.tol and self.lambda_list[i] > 0):
                    j = self.select_random_j(i, len(X_train))
                    Ej = self.predict(X_train[j]) - Y_train[j]

                    alpha_i_old = self.lambda_list[i]
                    alpha_j_old = self.lambda_list[j]

                    L, H = self.compute_L_H(self.lambda_list[j], self.lambda_list[i], Y_train[j], Y_train[i])

                    if L == H:
                        continue

                    eta = 2.0 * np.dot(X_train[i], X_train[j]) - np.dot(X_train[i], X_train[i]) - np.dot(X_train[j], X_train[j])

                    if eta >= 0:
                        continue

                    self.lambda_list[j] -= (Y_train[j] * (Ei - Ej)) / eta
                    self.lambda_list[j] = self.clip_alpha(self.lambda_list[j], H, L)

                    if abs(self.lambda_list[j] - alpha_j_old) < self.tol:
                        continue

                    self.lambda_list[i] += Y_train[i] * Y_train[j] * (alpha_j_old - self.lambda_list[j])

                    b1 = self.b - Ei - Y_train[i] * (self.lambda_list[i] - alpha_i_old) * np.dot(X_train[i], X_train[i]) \
                         - Y_train[j] * (self.lambda_list[j] - alpha_j_old) * np.dot(X_train[i], X_train[j])
                    b2 = self.b - Ej - Y_train[i] * (self.lambda_list[i] - alpha_i_old) * np.dot(X_train[i], X_train[j]) \
                         - Y_train[j] * (self.lambda_list[j] - alpha_j_old) * np.dot(X_train[j], X_train[j])

                    if 0 < self.lambda_list[i] < self.C:
                        self.b = b1
                    elif 0 < self.lambda_list[j] < self.C:
                        self.b = b2
                    else:
                        self.b = (b1 + b2) / 2.0

            if np.all(np.abs(self.lambda_list - alpha_old) < self.tol):
                break

        # Compute the weight vector w
        self.w = np.dot(self.lambda_list * self.Y, X_train)

    def select_random_j(self, i, m):
        j = i
        while j == i:
            j = np.random.randint(0, m)
        return j

    def compute_L_H(self, alpha_j, alpha_i, Y_j, Y_i):
        if Y_i != Y_j:
            L = max(0, alpha_j - alpha_i)
            H = min(self.C, self.C + alpha_j - alpha_i)
        else:
            L = max(0, alpha_i + alpha_j - self.C)
            H = min(self.C, alpha_i + alpha_j)
        return L, H

    def clip_alpha(self, alpha, H, L):
        if alpha > H:
            alpha = H
        if alpha < L:
            alpha = L
        return alpha

    def predict(self, x):
        return np.dot(self.w, x) + self.b

    def predict_batch(self, X):
        return np.sign(np.dot(self.w, X.T) + self.b)

# Supongamos que tienes las siguientes variables definidas: X, Y (datos y etiquetas)
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=41)

# Entrenar el modelo SVM personalizado
svm = SuperVectorMachine(C=5.0, learning_rate=0.05, max_iter=1000, tol=1e-5)
svm.train(X_train, Y_train)

# Realizar predicciones en el conjunto de prueba
Y_pred = svm.predict_batch(X_test)

# Calcular métricas de evaluación
accuracy = accuracy_score(Y_test, Y_pred)
confusion = confusion_matrix(Y_test, Y_pred)
classification = classification_report(Y_test, Y_pred)

# Visualizar la matriz de confusión
classes = np.unique(Y)  # Clases únicas en tus etiquetas Y

plt.figure()
plt.imshow(confusion, interpolation='nearest', cmap=plt.cm.Blues)
plt.title("Matriz de Confusión")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

# Etiqueta los valores en la matriz
thresh = confusion.max() / 2.
for i in range(confusion.shape[0]):
    for j in range(confusion.shape[1]):
        plt.text(j, i, format(confusion[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if confusion[i, j] > thresh else "black")

plt.ylabel('Clase Real')
plt.xlabel('Clase Predicha')
plt.tight_layout()
plt.show()

print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(confusion)
print("Classification Report:")
print(classification)

  self.lambda_list[j] -= (Y_train[j] * (Ei - Ej)) / eta
  self.lambda_list[i] += Y_train[i] * Y_train[j] * (alpha_j_old - self.lambda_list[j])


ValueError: Found input variables with inconsistent numbers of samples: [167, 665]