## 1. Computer Vision Traditional Methods

#### Course: Computer Vision - Master of Artificial Intelligence
Understand traditional methods for computer vision, which at the same time had a technical goal on how to use OpenCV.

<p align="center">
<img src="./Project1.gif" alt="drawing" width="700"/>
</p>  

In [None]:
import cv2
import numpy as np

'''
##############################################################################
INITIALIZE VIDEO PARAMETERS
##############################################################################
'''

# PROJECT: ASSIGNMENT 1 - FROM BASIC IMAGE PROCESSING TOWARDS OBJECT DETECTION
# HECTOR MANUEL ARTEAGA AMATE --> r0819325

source = 'Video_CV.mp4'
# VIDEO CAPTURE OBJECT
video_cap = cv2.VideoCapture(source)
height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = int(video_cap.get(cv2.CAP_PROP_FPS))

# RESIZE THE OUTPUT FRAME
w_out = int(width*0.8)
h_out = int(height*0.8)

# CONFIG FOR OUTPUT VIDEO
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('Assignment1_Arteaga_Hector.mp4', fourcc, fps, (w_out, h_out))
win_name = 'Video Preview'
cv2.namedWindow(win_name)

# IMAGE TO BE USED IN TEMPLATE MATCHING
# TEMPLATE MATCHING IS MAINLY USED IN GRAY SCALE
template = cv2.imread('Wax_template.png', 0)
h, w = template.shape

# IMAGE TO BE USED IN CARTE BLANCHE
ball = cv2.imread('Volleyball.png', cv2.IMREAD_UNCHANGED)

In [3]:
'''
##############################################################################
DEFINITION OF FUNCTIONS
1. Print text in certain frame.
2. Gaussian Blur
3. Bilateral Filter
4. Morphological Transformations: Dilation and Erosion.
5. Sobel Operator 
##############################################################################
'''

# SUBTITLES
def subtitle(frame, text):
    font = cv2.FONT_HERSHEY_SIMPLEX
    img_sub = cv2.putText(frame, text, (340, 40), font, 1, (255, 0, 0), 3)
    return img_sub

# FILTERING
def gaussian_blur(frame, kernel, sigmaX):
    gblur = cv2.GaussianBlur(frame, kernel, sigmaX)
    return gblur

def bilateral_blur(frame, sigma, filter_size):
    sigma_color = sigma
    sigma_space = sigma
    bblur = cv2.bilateralFilter(frame, filter_size, sigma_color, sigma_space)
    return bblur

# DILATION - MORPHOLOGICAL TRANSFORMATIONS
def dilation(img, selector):
    # Convert to HSV SCALE
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # CREATE A BLUE IMAGE TO BE USED TO SHOW THE EFFECT OF THE
    
    # MORPHOLOGICAL TRANSFORMATION
    bkg = np.zeros((img.shape[0], img.shape[1], 3))
    bkg[:] = 255, 0, 0
    # Lower and Upper boundaries for detection of colors in HSV format
    # Color = RED
    l1 = np.array([170, 130, 185])
    u1 = np.array([180, 255, 255])
    # Create a mask for defined color
    mask1 = cv2.inRange(hsv, l1, u1)
    # APPLY MORPHOLOGICAL TRANSFORMATION (MT)
    kernel = np.ones((3, 3), np.uint8)
    dil = cv2.dilate(mask1, kernel, iterations=1)
    # CALCULATE THE EFFECT OF THE MT
    diff = dil - mask1

    if selector == 0:
        img_det = cv2.cvtColor(mask1, cv2.COLOR_GRAY2BGR)
    if selector == 1:
        img_det = cv2.bitwise_and(img, img, mask=dil)  # mask1
    if selector == 2:
        # SHOW MT IN DIFFERENT COLOR
        # Calculate the dilation effect and show it in blue color
        mt_eff = cv2.bitwise_and(bkg, bkg, mask=diff)
        img_det = cv2.bitwise_and(img, img, mask=mask1)
        dil_2 = img_det + mt_eff
        img_det = dil_2.astype(np.uint8)
    return img_det

def sobel_operator(img, dx, dy):
    bkg = np.zeros((img.shape[0], img.shape[1], 3))
    bkg[:] = 0, 255, 0
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)

    sobel = cv2.Sobel(img2, cv2.CV_64F, dx, dy, ksize=5)
    cv2.normalize(sobel, dst=sobel, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)
    # SOBEL IS COMPUTED AS A FLOAT NUMBER
    
    # First is normalized, then converted to integer between 0 and 255
    sobel_int = (255 * sobel).astype(np.uint8)

    # VISUALIZE IN COLOR
    # The algorithm used was similar to the one used to show
    # the effect of the morphological transformation. The edges
    # detected by sobel are mainly black or white, so a mask is
    # created and then show this mask in green.
    l1 = np.array([160, 160, 160])
    u1 = np.array([255, 255, 255])
    l2 = np.array([0, 0, 0])
    u2 = np.array([90, 90, 90])
    mask1 = cv2.inRange(sobel_int, l1, u1)
    mask2 = cv2.inRange(sobel_int, l2, u2)
    mask3 = mask1 + mask2
    res = cv2.bitwise_and(bkg, bkg, mask=mask3)
    res = res.astype(np.uint8)
    return res

---
## 2. Applied Computer Vision

#### Research project: Increase autonomy of AMR through vision
There is AMR for navigation through complex warehouses, with varying dynamic constraints. Now the intention is to add visual perception to the vehicle to detect parking slots and then to automatically perform a parking maneuver.

<p align="center">
<img src="./AMR.png" alt="drawing" width="500"/>
</p> 

In [None]:
def detectLines(picture, parameters):
    """
    FUNCTION TO DETECT LINES FROM THE PICTURE
    - Filtering to reduce noise, but retain edges
    - Sobel edge detector || Canny edge detector
    - Hough transform
    """

    """"########## FILTERING ##########"""
    ## Blurring:
    # blurred_image = cv2.blur(left_camera_flip,(3,3),0,)

    ## Gaussian blurring
    gaussian_blurred_image = cv2.GaussianBlur(picture, (3, 3), 0)

    ## Bilateral filtering
    # bilateral_filter_image = cv2.bilateralFilter(picture, 9, 10, 0, cv2.BORDER_ISOLATED)

    """"########## EDGE ENHANCEMENT ##########"""

    # # SOBEL OPERATOR
    # bilateral_filter_image_normalized = bilateral_filter_image / 255. # the image has to be normalized
    # sobel_x = cv2.Sobel(bilateral_filter_image_normalized, cv2.CV_64F, 1, 0, ksize=3)
    # sobel_y = cv2.Sobel(bilateral_filter_image_normalized, cv2.CV_64F, 0, 1, ksize=3)
    # cv2.normalize(sobel_x, dst=sobel_x, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)
    # cv2.normalize(sobel_y, dst=sobel_y, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_64F)
    # sobel = np.sqrt(sobel_x ** 2 + sobel_y ** 2)
    # sobel_enhanced = cv2.sqrt(cv2.addWeighted(cv2.pow(sobel_x, 2.0), 1.0, cv2.pow(sobel_y, 2.0), 1.0, 0.0))

    ## CANNY EDGE DETECTOR
    canny = cv2.Canny(gaussian_blurred_image, parameters.cannyTresh.value, 255)

    """"########## THRESHOLDING ##########"""
    ## THRESHOLD
    thresh = parameters.edgeTreshold.value
    maxValue = 255
    retval, thresholded = cv2.threshold(gaussian_blurred_image, thresh, maxValue, cv2.THRESH_BINARY)
    # retval, sobel_threshold = cv2.threshold(sobel_enhanced, thresh, maxValue, cv2.THRESH_BINARY)
    if parameters.thresholdOrCanny.value:
        useMe = thresholded
    else:
        useMe = canny
    cv2.imshow('Threshold or Canny', useMe)

    """"########## MORPHOLOGICAL TRANSFORMATIONS ##########"""
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, 
                                       (parameters.kernelmorph.value, parameters.kernelmorph.value))
    if parameters.morphIterations.value != 0:
        if parameters.dilationOrErosion.value == 0:
            morph = cv2.dilate(useMe, kernel, iterations=parameters.morphIterations.value)
        if parameters.dilationOrErosion.value == 1:
            morph = cv2.erode(useMe, kernel, iterations=parameters.morphIterations.value)

    else:
        morph = useMe
    cv2.imshow('Morphological', morph)

    """"########## HOUGH TRANSFORM ##########"""

    dilate_color = cv2.cvtColor(morph, cv2.COLOR_GRAY2RGB)
    hough_lines = cv2.HoughLinesP(morph,
                                  rho = 1,
                                  theta = np.pi / 180,
                                  threshold = parameters.houghthresh.value,
                                  minLineLength=parameters.minLineLength.value,
                                  maxLineGap=parameters.maxLineGap.value)

    count = 0
    if hough_lines is not None:
        for line in hough_lines:

            x1, y1, x2, y2 = line[0]
            cv2.line(dilate_color, (x1, y1), (x2, y2), (255, 255, 0), 2)
            if count == parameters.maxNbLines.value:
                ## PIECE OF CODE TO ANALYZE FRAME BY FRAME
                # list_of_lines = np.asarray(hough_lines)
                # np.save('list_of_lines.py', list_of_lines)
                # cv2.imwrite("Frame.png", dilate_color_without_lines)
                # exit()
                break
            count += 1

    return dilate_color, hough_lines

&nbsp;
<p align="center">
<img src="./Line_detection.png" alt="drawing" width="1000"/>
</p>

---
## 3. Deep Learning WorkFlow

#### 3.1. Loading data (Datasets)

Datasets: VGG Face dataset, Pascal VOC 2009, nuScenes
&nbsp;
<p align="center">
<img src="./Face.png" alt="drawing" width="400"/>
</p>

In [None]:
# Input data files are available in the read-only "../input/" directory

train = pd.read_csv('/kaggle/input/kul-h02a5a-computer-vision-ga1-2022/train_set.csv', index_col = 0)
train.index = train.index.rename('id')

test = pd.read_csv('/kaggle/input/kul-h02a5a-computer-vision-ga1-2022/test_set.csv', index_col = 0)
test.index = test.index.rename('id')

# read the images as numpy arrays and store in "img" column
train['img'] = [cv2.cvtColor(np.load('/kaggle/input/kul-h02a5a-computer-vision-ga1-2022/train/train_{}.npy'.format(index), allow_pickle=False), cv2.COLOR_BGR2RGB) 
                for index, row in train.iterrows()]

test['img'] = [cv2.cvtColor(np.load('/kaggle/input/kul-h02a5a-computer-vision-ga1-2022/test/test_{}.npy'.format(index), allow_pickle=False), cv2.COLOR_BGR2RGB) 
                for index, row in test.iterrows()]
  

train_size, test_size = len(train),len(test)

"The training set contains {} examples, the test set contains {} examples.".format(train_size, test_size)

### 3.2. Data Augmentations

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(rotation_range = 45, 
                         width_shift_range = 0.1,
                         height_shift_range = 0.1, 
                         shear_range = 0, 
                         zoom_range = 0.1, 
                         channel_shift_range = 30., 
                         horizontal_flip=True)

# batch_size --> (How many Images is generating)
batch_size = 5
train_X_aug = []
train_y2 =  np.empty((batch_size*train_X.shape[0],), int)

for i in range(len(train_X)):
    x = train_X[i]
    # INPUT SIZE
    # IMAGES, SIZE[0], SIZE[1], # Channels
    x = x.reshape((1, ) + x.shape)
    aug_iter = gen.flow(x, batch_size=batch_size)
    aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(batch_size)]
    
    for j in range(batch_size):
        train_X_aug.append(aug_images[j])
        train_y2[(i*5)+j] = train_y[i]
        
train_X_aug = np.array(train_X_aug)

### 3.3. Feature Representations
PCA --> Feature Extractor for dimensionality reduction 

In [None]:
class PCAFeatureExtractor(IdentityFeatureExtractor):
    
    def __init__(self, n_components, face_size, whiten=False):
        self.n_components = n_components
        self.FACE_SIZE = face_size
        self.whiten = whiten
    
    def fit(self, X):
        n_samples, n_features = X.shape

        self.mean = np.mean(X, axis=0, dtype=np.float64)
        self.mean_face = self.mean.reshape(self.FACE_SIZE)

        centered_data = X - self.mean
        # ECONCOMIC SVD
        U, S, Vt = np.linalg.svd(centered_data, full_matrices=False)
        
        # CHANGING SIGNS
        max_abs_cols = np.argmax(np.abs(U), axis=0)
        signs = np.sign(U[max_abs_cols, range(U.shape[1])])
        U *= signs
        Vt *= signs[:, np.newaxis]

        components = Vt

        self.U_vector = U[0:self.n_components]
        self.S_vector = S[0:self.n_components]

        self.components = components[0:self.n_components]
        self.singular_values = singular_values[:self.n_components]

        eigenFaces = [];
        for i in range(0, self.n_components):
            eigenFace = np.reshape(self.components[i,:], self.FACE_SIZE)
            eigenFaces.append(eigenFace)
        self.eigenFaces = eigenFaces
        return U, S, Vt
    
    def transform(self, X):
        U, S, Vt = self.fit(X)
        U = U[:, :self.n_components] * S[:self.n_components]
        # X_proj = X*V = U*S*Vt*V = U*S
        return U
        
    def inverse_transform(self, X):
        return np.dot(X, self.components) + self.mean

### 3.4. Classifier
MLP, Convolutional Neural Networks, SVM

In [None]:
class ConvolutionalNeuralNetwork:
    def fit(self, X, y):
        self.cnn_model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(filters=36, kernel_size=(3,3), activation=tf.nn.relu),
            tf.keras.layers.MaxPool2D(pool_size=(2,2)),
            
            tf.keras.layers.Conv2D(filters=36, kernel_size=(3,3), activation=tf.nn.relu),
            tf.keras.layers.MaxPool2D(pool_size=(2,2)),
            
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(128, activation=tf.nn.relu),
            tf.keras.layers.Dense(3, activation=tf.nn.softmax)
        ])
        
        #MODEL PARAMETERS
        self.cnn_model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=2),
                               loss='categorical_crossentropy',
                               metrics=['accuracy'])
        self.Y_train = tf.keras.utils.to_categorical(y)
        
        #TRAIN
        self.cnn_model.fit(X, self.Y_train, epochs=20, batch_size=32)
        
    def predict(self, X):
        predict_y = self.model.predict(X) 
        return np.argmax(predict_y,axis=1)
    
    def __call__(self, X):
        return self.predict(X)

### 3.5. CNN Architectures 
Transfer Learning, Fine Tunning.  
Note: This model was built to train for Pascal VOC

In [None]:
def classifier_inception():
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224,224,3))
    for layer in base_model.layers:
        layer.trainable = False
    # base_model.summary()
    x = layers.Flatten()(base_model.output)
    x = layers.Dense(2048, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(2048, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    predictions = layers.Dense(20, activation = 'sigmoid')(x)
    head_model = Model(inputs = base_model.input, outputs = predictions)
    return head_model

head_model = classifier_inception()
head_model.compile(optimizer= optimizers.Adam(), loss=losses.binary_crossentropy, metrics=['accuracy'])
# head_model.summary()

---
## 4. Thesis Research

<p align="center">
<img src="./Synthetic_Real.png" alt="drawing" width="800"/>
</p>

Visual patches based on GPS information. 
<p align="center">
<img src="./scene-0048.png" alt="drawing" width="300"/>
</p>

### 4.1. PyTorch

In [None]:
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=3):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=8,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(
            in_channels=8,
            out_channels=16,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x
    
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
in_channels = 3
num_classes = 3
learning_rate = 3e-4 # karpathy's constant
batch_size = BATCH_SIZE
num_epochs = 250

# Initialize network
model = CNN(in_channels=in_channels, num_classes=num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### 4.2. Post-processing
Metrics: Accuracy, Confusion Matrix, Triplet loss, Dice Loss.

<p align="center">
<img src="./Accuracy.png" alt="drawing" width="700"/>
</p>