# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import os
import errno
import shutil
import zipfile

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import pycuda.driver as cuda
# import pycuda.autoinit
# from pycuda.compiler import SourceModule

import tensorflow as tf
import cv2

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.metrics import confusion_matrix

from google.colab import drive
drive.mount("/content/drive", force_remount=True)


tf.test.gpu_device_name()

Mounted at /content/drive


''

# Manage Files

In [None]:
def create_folders():
  root = 'data/'

  # Creating directories
  try:
      os.mkdir(root)
      os.mkdir('train')
      os.mkdir('test')
  except OSError as e:
      if e.errno == errno.EEXIST:
          print('Directory already exist')
      else:
          raise

  # Copying zip files
  shutil.copy('/content/drive/MyDrive/sdw_classification/datasets/not_processed/metal.zip',
              '/content/data')
  shutil.copy('/content/drive/MyDrive/sdw_classification/datasets/not_processed/cardboard.zip',
              '/content/data')
  
  # Unziping the files
  with zipfile.ZipFile('/content/data/metal.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data')
  with zipfile.ZipFile('/content/data/cardboard.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data')

  # Removing zip folders
  os.remove('/content/data/metal.zip')
  os.remove('/content/data/cardboard.zip')

In [None]:
create_folders()

# Image Processing

In [None]:
def processing(paths, dims):

  # Instancing Image Generator
  image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
                                                                  validation_split=0.2)

  # Processing techniques included
  train_data_gen = image_generator.flow_from_directory(directory=paths[0], 
                                                      save_to_dir=paths[1],
                                                      target_size=(dims, dims),
                                                    save_prefix='tr',
                                                    color_mode = "grayscale",
                                                    save_format='webp',
                                                    subset='training')

  val_data_gen = image_generator.flow_from_directory(directory=paths[0],
                                                    save_to_dir=paths[2],
                                                    target_size=(dims, dims),
                                                    color_mode = "grayscale",
                                                    save_prefix='ts',
                                                    save_format='webp',
                                                    subset='validation')
  
  # Extracting train and test images and labels
  images_tr, label_tr = train_data_gen.next()
  images_ts, label_ts = val_data_gen.next()

  # Shape dimensions
  d1 = images_tr.shape[3]

  # Flattening
  Xtr = []

  for i in range(0, images_tr.shape[0]):
    image =images_tr[i][:, :, 0]
    X_train = np.reshape(image, (1, d1*dims*dims))[0]
    Xtr.append(X_train)

  Xts = []

  for i in range(0, images_ts.shape[0]):
    images =images_ts[i][:, :, 0]
    Xs_train = np.reshape(images, (1, d1*dims*dims))[0]
    Xts.append(Xs_train)

  # Data separation
  x_train = pd.DataFrame(Xtr)
  x_test = pd.DataFrame(Xts)

  y_train = label_tr[:,0]
  y_test = label_ts[:,0]

  return (x_train,x_test,y_train,y_test)

# Models

In [None]:
def models(x_train,x_test,y_train,y_test):
  
  neigh = KNeighborsClassifier(n_neighbors=2)
  neigh.fit(x_train, y_train)

  predict_knn = neigh.predict(x_test)
  cm = confusion_matrix(predict_knn, y_test)
  score = neigh.score(x_test, y_test)

  return (cm,score)

# Manage Files

In [None]:
def remove_folders():
  
  # Rename directories
  os.rename('data/cardboard','data/cardboardprocessed')
  os.rename('data/metal','data/metalprocessed')

  # Ziping the folders
  shutil.make_archive('cardboardprocessed','zip','data/cardboardprocessed')
  shutil.make_archive('metalprocessed','zip','data/metalpreprocessed')

  # Copying zip folders
  shutil.copy('cardboardprocessed.zip','/content/drive/MyDrive/sdw_classification/datasets/compressed')
  shutil.copy('metalprocessed.zip','/content/drive/MyDrive/sdw_classification/datasets/compressed')

  # Removing folders
  os.remove('cardboardprocessed.zip')
  os.remove('metalprocessed.zip')
  shutil.rmtree('data')
  shutil.rmtree('train')
  shutil.rmtree('test')

# Experiments

In [None]:
def experiments(dims):

  # Manage files 
  create_folders()

  # Image processing
  paths =  ['/content/data','/content/train','/content/test']
  (x_train,x_test,y_train,y_test) = processing(paths,dims)

  # Model training and testing
  (cm,score) = models(x_train,x_test,y_train,y_test)

  # Manage files
  remove_folders()
  
  return (cm,score)