# Librerias

In [1]:
# Librerias para crear los vectores de caracteristicas
import os
import numpy as np
from PIL import Image
import pywt
import pickle
# Liberias para crear el modelo de clasificacion
import matplotlib.pyplot as plt
from sklearn.metrics import recall_score, f1_score, precision_score,confusion_matrix, classification_report
from sklearn.preprocessing import MinMaxScaler

In [2]:
def preprocess_images(input_path, output_path, target_size=(256, 256)):
    os.makedirs(output_path, exist_ok=True)
    images = os.listdir(input_path)

    for image_name in images:
        image_path = os.path.join(input_path, image_name)
        imagen = Image.open(image_path)

        imagen = imagen.resize(target_size, Image.LANCZOS)

        output_image_path = os.path.join(output_path, image_name)
        imagen.save(output_image_path)

def load_labels(input_path):
    images = os.listdir(input_path)
    labels = [image_name[:3] for image_name in images]
    return labels

def extract_features(image, num_levels=1):
    LL = image.copy()
    for _ in range(num_levels):
        LL, (LH, HL, HH) = pywt.dwt2(LL, 'haar')
        LL = np.max(LL, axis=0)
        LH = np.max(LH, axis=0)
        HL = np.max(HL, axis=0)
        HH = np.max(HH, axis=0)
        LL = np.concatenate((LL, LH, HL, HH))

    return LL.flatten()

def preprocess_and_extract_features(input_path, output_path, num_levels=1):
    preprocess_images(input_path, output_path)
    images = os.listdir(output_path)
    features_list = []

    for image_name in images:
        image_path = os.path.join(output_path, image_name)
        imagen = Image.open(image_path)
        imagen = imagen.convert('L')
        imagen = np.array(imagen)
        features = extract_features(imagen, num_levels)
        features_list.append(features)

    return features_list

def normalize_features(features):
    scaler = MinMaxScaler()
    features_normalized = [scaler.fit_transform(feature.reshape(-1, 1)).flatten() for feature in features]
    return features_normalized



# Base de datos
1. Descarge en su disco esta Base de Datos:
[Download](https://docs.google.com/spreadsheets/d/15-E3kiLJ6bCyXuJvSmxYAp2QYMkPX2QlQ597fAsPYy8/edit?usp=sharing).
2. Para cargar la base de datos de su disco duro utilize el siguiente comando.
from google.colab import files
uploaded = files.upload()
3. Cargamos 70% de los datos para entrenamiento y el resto para testing
4. Solo estamos tomando 2 características, pero usted debe usar métodos de reducción de la dimensionalidad como PCA.


In [3]:
# Base de datos
#from google.colab import files
#uploaded = files.upload()

# Cargando datos
#Nombre_Data_Set = 'iris - iris.csv'
#iris = pd.read_csv(Nombre_Data_Set)

# Directorios de entrada y salida
from google.colab import drive
drive.mount('/content/drive')

path_images = '/content/drive/MyDrive/ML-1/images/'
path_save = '/content/drive/MyDrive/ML-1/p_images/'

# Obtener etiquetas
labels = load_labels(path_images)

# Obtener características independientes
indep = preprocess_and_extract_features(path_images, path_save, num_levels=1)


# Normalizar las características independientes
indep_normalized = normalize_features(indep)

# Combinar etiquetas y características independientes en un diccionario
dataset = {'labels': labels, 'indep': indep_normalized}

# Nombre del archivo para guardar el dataset
output_file = 'dataset.pkl'

# Guardar el dataset en un archivo utilizando pickle
with open(output_file, 'wb') as file:
    pickle.dump(dataset, file)



Mounted at /content/drive


# **Soft SVM**

- Implementar todas las funciones para que este código sea funcional

In [31]:
import numpy as np

class MulticlassSVM:
    def __init__(self):
        self.features = None
        self.targets = None
        self.bias = None
        self.weight = None
        self.val=None
        self.c = 10
        self.alpha = 0.00001


    def h(self,features,weight,bias):
        return np.dot(features,weight.transpose())+bias

    def loss(self,targets,features,weight,bias,c):
        reg = weight**2/2
        suma=0
        for i in range(len(targets)):
          sum=max(0,1-(targets[i]*self.h(features,weight,bias)[i]))
          suma+=sum
        return reg + c*suma

    def grad(self,targets,features,weight,bias,c):
        if targets*self.h(features,weight,bias)>=1:
          dw = weight
          db = 0
        else:
          dw = weight -c*targets*features
          db = -c*targets
        return dw, db

    def update(self,weight,bias,grad,alpha):
        weight = weight -alpha*grad[0]
        bias = bias -alpha*grad[1]
        return weight,bias


    def train_one_class(self, features, binary_targets):
        c = 1
        alpha = 0.001
        num_epochs = 1000
        weight = np.random.rand(features.shape[1])
        bias = np.random.random()
        loss_function = []
        step = num_epochs // 10

        for epoch in range(num_epochs):
          loss_value = self.loss(binary_targets, features, weight, bias, c)
          loss_function.append(loss_value)
          for idx, x_i in enumerate(features):
            grad_values = self.grad(binary_targets[idx], x_i, weight, bias, c)
            w, b = self.update(weight, bias, grad_values, alpha)
        return w,b

    def precision(self, true_labels, predicted_labels):
            true_positives = np.sum((true_labels == 1) & (predicted_labels == 1))
            false_positives = np.sum((true_labels == -1) & (predicted_labels == 1))
            return true_positives/(true_positives+false_positives)

    def recall(self, true_labels, predicted_labels):
            true_positives = np.sum((true_labels == 1) & (predicted_labels == 1))
            false_negatives = np.sum((true_labels == 1) & (predicted_labels == -1))
            return true_positives/(true_positives+false_negatives)

    def f1_score(self, true_labels, predicted_labels):
            precision_value = self.precision(true_labels, predicted_labels)
            recall_value = self.recall(true_labels, predicted_labels)
            return 2*(precision_value*recall_value)/(precision_value+recall_value)


    def test(self, features, targets, val, weight, bias):
        pred = self.h(features, weight, bias)

        print(targets)
        suma = 0
        #prec = self.precision(val, pred)
        #rec = self.recall(val, pred)
        #f1 = self.f1_score(val,pred)
        for i in range(len(pred)):
          if pred[i]>=1:
            pred[i]=1
          else:
            pred[i]=-1
        print(pred)
        for j in range(len(pred)):
          if pred[i]==targets[i]:
            suma+=1
        return suma/len(pred)*100





In [5]:
with open('dataset.pkl', 'rb') as file:
    dataset = pickle.load(file)
index = np.arange(len(dataset['labels']))
rnd = np.random.RandomState(123)
suffle_index = rnd.permutation(index)

x_shuffle, y_shuffle = [], []
for i in range(len(suffle_index)):
    x_shuffle.append(dataset['indep'][suffle_index[i]])
    y_shuffle.append(dataset['labels'][suffle_index[i]])
x_shuffle = np.array(x_shuffle)
y_shuffle = np.array(y_shuffle)



In [42]:
train_size = int(len(x_shuffle) * 0.70)
test_size = int(len(x_shuffle) * 0.15)
val_size = int(len(x_shuffle) * 0.15)

# Dividimos el dataset en train, test y validation
x_train = x_shuffle[:train_size]
y_train = y_shuffle[:train_size]

x_test = x_shuffle[train_size:train_size + test_size]
y_test = y_shuffle[train_size:train_size + test_size]

x_val = x_shuffle[train_size + test_size:]
y_val = y_shuffle[train_size + test_size:]
x_val = x_val[:124]
y_val = y_val[:124]


In [43]:
partitions = 3
accuracy_models=[]
recall_models=[]
f1_models=[]
partitions_size = len(x_train)//partitions

#resultados = []
temp = ['001','002','003','004','005','006','007','008','009','010']

for k in range(10):

  multiclass_svm = MulticlassSVM()
  accuracy_temp=[]
  recall_temp=[]
  f1_temp=[]
  x_testf=x_test.copy()
  y_testf=y_test.copy()
  x_valf=x_val.copy()
  y_valf=y_val.copy()
  #cambio los valores de 1 y -1 para cada modelo
  #aqui radica el problema solo quiero que se reestablezca el vector en cada iteracion
  for m in range(len(y_valf)):
          if y_valf[m]==temp[k]:
            y_valf[m]=1
          else:
            y_valf[m]=-1
  y_valf=y_valf.astype(int)
  for n in range(len(y_testf)):
          if y_testf[n]==temp[k]:
            y_testf[n]=1
          else:
            y_testf[n]=-1
  y_testf=y_testf.astype(int)
  for i in range(partitions):
      x_train_part =  x_train[i * partitions_size:(i + 1) * partitions_size].copy()
      y_train_part =  y_train[i * partitions_size: (i + 1) * partitions_size:].copy()
      for j in range(len(y_train_part)):
          if y_train_part[j]==temp[k]:
            y_train_part[j]=1
          else:
            y_train_part[j]=-1
      binary_targets=y_train_part.astype(int)
      weight, bias = multiclass_svm.train_one_class(x_train_part, binary_targets)
      print(k,i)

      tes=multiclass_svm.test(x_testf,y_testf,y_valf,weight,bias)
      accuracy_temp.append(tes)
      #recall_temp.append(tes[1])
      #f1_temp.append(tes[2])

  p1=sum(accuracy_temp)/len(accuracy_temp)
  accuracy_models.append(p1)
  #p2=sum(recall_temp)/len(recall_temp)
  #recall_models.append(p2)
  #p3=sum(f1_temp)/len(f1_temp)
  #f1_models.append(p3)




0 0
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]
0 1
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1

In [44]:
print(accuracy_models)
#print(recall_models)
#print(f1_models)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
