# Mini projects

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Level 1

### Photo art app

1. Create a function that receives an image and converts it to a sketch so it looks similar to this:
![Example 1](img/sketch.png)

In [3]:
window_name = "filter"
cap = cv2.VideoCapture(0)
key = 0 
cv2.namedWindow(window_name,cv2.WINDOW_AUTOSIZE)
while (True):
    ret, frame = cap.read()
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray_frame, (5, 5), 0)
    edges = cv2.Canny(blurred, 30, 150)
    
    cv2.imshow(window_name,edges)

    if key == 27:
        break
    
    key = cv2.waitKey(1)



cap.release()

cv2.destroyAllWindows()
cv2.waitKey(1)




-1

### Instagram filters app

1. Create a function that given a photo it applies a black and white filter
1. Create a filter that will change color of the image 

In [4]:


def black_image(image_path):
    img = cv2.imread(image_path)
    image=img.copy()
    im_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


    thresh, _ = cv2.threshold(im_gray, 128, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    image = cv2.threshold(im_gray, thresh, 255, cv2.THRESH_BINARY)[1]

    cv2.imshow("",image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

black_image("img/noisy.png")


## Level 2

### Photo art app

1. Change the app so it can now do the same but using your webcam to make it in real time  
**Hint:** you can use ```cv2.VideoCapture(0)```

In [5]:

window_name = "filter"
cap = cv2.VideoCapture(0)
key = 0 
cv2.namedWindow(window_name,cv2.WINDOW_AUTOSIZE)
while (True):
    ret, frame = cap.read()
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

    thresh, _ = cv2.threshold(gray_frame, 128, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    frame = cv2.threshold(gray_frame, thresh, 255, cv2.THRESH_BINARY)[1]
    

    
    cv2.imshow(window_name,frame)

    if key == 27:
        break
    
    key = cv2.waitKey(1)



cap.release()

cv2.destroyAllWindows()
cv2.waitKey(1)



-1

### Photo correction app

1. Create a funtion to clean *noise* from images


![Noisy image](img/n.png)

In [6]:

def clean_noise(image_path):

    img = cv2.imread(image_path)
    image=img.copy()

    cleaned_image = cv2.fastNlMeansDenoisingColored(image,None,10,10,7,21)

    cv2.imshow("",cleaned_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    cv2.waitKey(1)
clean_noise('img/noisy.png')

In [7]:
kernel = np.array([[0.272, 0.534, 0.131],
        [0.349, 0.686, 0.168],
        [0.393, 0.769, 0.189]])

img = cv2.imread("img/noisy.png")
image=img.copy()


R,G,B = image.shape

tb = 0.272*R + 0.534*G + 0.131*B
tg = 0.349/R + 0.686*G + 0.168/B
tr = 0.393/R + 0.769*G + 0.189*B


if tb > 255: b = 255 
else: b = tb
if tg > 255: g = 255 
else: g = tg
if tr > 255:r = 255 
else: r = tr




### Instagram filters app

1. Create a function that applies a sepia filter to the image, for a BGR image apply this kernel:
        [0.272, 0.534, 0.131],
        [0.349, 0.686, 0.168],
        [0.393, 0.769, 0.189]
        
Get the BGR value of the pixel.
Calculate tr, tg and tb using the formula

tb = 0.272R + 0.534G + 0.131B
tg = 0.349R + 0.686G + 0.168B
tr = 0.393R + 0.769G + 0.189B

Take the integer value.

Set the new RGB value of the pixel as per the following condition:

If tb > 255 then b = 255 else b = tb
If tg > 255 then g = 255 else g = tg
If tr > 255 then r = 255 else r = tr


Example:

Consider a color pixel with the following values

A = 255

R = 100

G = 150

B = 200

Where A, R, G and B represents the Alpha, Red, Green and Blue value of the pixel.

Remember! ARGB will have an integer value in the range 0 to 255.

So, to convert the color pixel into sepia pixel we have to first calculate tr, tg and tb.

tr = 0.393(100) + 0.769(150) + 0.189(200)

tr = 192.45

tr = 192 (taking integer value)

Similarly,

tg = 0.349(100) + 0.686(150) + 0.168(200) = 171 (taking integer value)

and tb = 0.272(100) + 0.534(150) + 0.131(200) = 133 (taking integer value)

## Level 3

### Instagram filters app

1. Apply the sepia filter by using the cv2.transfor function
1. Create different filters from the sepia filter so it renders different images

# More projects

## Handwriting recognition

1. Write a one digit number (from 0 to 9) on a paper and using your mobile phone take a picture of it
1. Crop the image manually and pass it to a a function that will process it so it looks like a char in the MNIST dataset, make sure it is the correct size as well
![4](img/4.png)

1. Pass it through a MNIST classifier and print the prediction on screen

1. Modify the image so now it also contains the predicted result on the image
1. Try it with all digits

In [3]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F

from torchvision import datasets, transforms
print("done")

done


In [4]:
def view_classify(img, ps):

    ps = ps.data.numpy().squeeze()

    fig, (ax1, ax2) = plt.subplots(figsize=(6,9), ncols=2)
    ax1.imshow(img.resize_(1, 28, 28).numpy().squeeze())
    ax1.axis('off')
    ax2.barh(np.arange(10), ps)
    ax2.set_aspect(0.1)
    ax2.set_yticks(np.arange(10))
    ax2.set_yticklabels(np.arange(10))
    ax2.set_title('Class Probability')
    ax2.set_xlim(0, 1.1)

In [5]:
# Define a transform to normalize the data (Preprocessing)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5), (0.5)) ])

# Download and load the training data
trainset    = datasets.MNIST('MNIST_data/', download=False, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True)

# Download and load the test data
testset    = datasets.MNIST('MNIST_data/', download=False, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16, shuffle=True)

In [6]:
class Network(nn.Module):
    
    # Defining the layers, 128, 64, 10 units each
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 10)
        
    # Forward pass through the network, returns the output logits
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.log_softmax(x, dim=1)
        return x

model = Network()
model

Network(
  (fc1): Linear(in_features=784, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=10, bias=True)
)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

print('Initial weights - ', model.fc1.weight)

images, labels = next(iter(trainloader))
images.resize_(16, 784)

# Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()

# Forward pass, then backward pass, then update weights
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient -', model.fc1.weight.grad)
optimizer.step()

Initial weights -  Parameter containing:
tensor([[ 0.0170, -0.0125,  0.0222,  ...,  0.0077, -0.0168,  0.0189],
        [-0.0286,  0.0292, -0.0131,  ..., -0.0336, -0.0067, -0.0139],
        [-0.0338, -0.0334, -0.0129,  ..., -0.0001,  0.0305, -0.0059],
        ...,
        [ 0.0141,  0.0349, -0.0058,  ..., -0.0314,  0.0040,  0.0253],
        [ 0.0329,  0.0112, -0.0192,  ...,  0.0193,  0.0236,  0.0235],
        [ 0.0086, -0.0254,  0.0135,  ...,  0.0073, -0.0185, -0.0259]],
       requires_grad=True)
Gradient - tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        [-0.0012, -0.0012, -0.0012,  ..., -0.0012, -0.0012, -0.0012],
        ...,
        [-0.0208, -0.0208, -0.0208,  ..., -0.0208, -0.0208, -0.0208],
        [ 0.0081,  0.0081,  0.0081,  ...,  0.0081,  0.0081,  0.0081],
        [-0.0060, -0.0060, -0.0060,  ..., -0.0060, -0.0060, -0.0060]])
  Variable._execution_engine.run_backward(


In [8]:
epochs = 5
print_every = 1860

for e in range(epochs):
    running_loss = 0
    print(f"Epoch: {e+1}/{epochs}")

    for i, (images, labels) in enumerate(iter(trainloader)):

        # Flatten MNIST images into a 784 long vector
        images.resize_(images.size()[0], 784)
        
        optimizer.zero_grad()
        
        output = model.forward(images)   # 1) Forward pass
        loss = criterion(output, labels) # 2) Compute loss
        loss.backward()                  # 3) Backward pass
        optimizer.step()                 # 4) Update model
        
        running_loss += loss.item()
        
        if i % print_every == 0:
            print(f"\tIteration: {i}\t Loss: {running_loss/print_every:.4f}")
            running_loss = 0

Epoch: 1/5
	Iteration: 0	 Loss: 0.0013
	Iteration: 1860	 Loss: 0.8647
	Iteration: 3720	 Loss: 0.3960
Epoch: 2/5
	Iteration: 0	 Loss: 0.0002
	Iteration: 1860	 Loss: 0.3033
	Iteration: 3720	 Loss: 0.2767
Epoch: 3/5
	Iteration: 0	 Loss: 0.0001
	Iteration: 1860	 Loss: 0.2404
	Iteration: 3720	 Loss: 0.2088
Epoch: 4/5
	Iteration: 0	 Loss: 0.0000
	Iteration: 1860	 Loss: 0.1934
	Iteration: 3720	 Loss: 0.1859
Epoch: 5/5
	Iteration: 0	 Loss: 0.0000
	Iteration: 1860	 Loss: 0.1687
	Iteration: 3720	 Loss: 0.1657


In [34]:
correct, total = 0,0
predictions = []
model.eval()

for i,data in enumerate(testloader,0):
    inputs, labels = data
    inputs.resize_(inputs.size()[0], 784)
    #print(inputs.shape)
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data,1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print("testing set accuracy: %d %%" % (100*correct/total))

testing set accuracy: 95 %


In [35]:
def to_mnist(img):
    img = cv2.blur(img, (11, 11))
    _, img = cv2.threshold(img, 150, 255, cv2.THRESH_TRUNC)
    img = cv2.bitwise_not(img)
    kernel = np.ones((11, 11), np.uint8)
    # img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel, iterations=1)
    img = cv2.dilate(img, kernel, iterations=1)
    _, img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
    # kernel_er = np.ones((7, 7), np.uint8)
    # img = cv2.erode(img, kernel_er, iterations=1)
    img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_LANCZOS4)

    return img

def to_tensor(arr: np.ndarray):
    tens = torch.from_numpy(arr).float()
    tens = F.normalize(tens)
    tens = tens.reshape(-1 ,784)
    return tens

In [41]:
three = cv2.imread("img/3.jpg")
three=to_mnist(three)

three=to_tensor(three)
with torch.no_grad():
    logit = model.forward(three)
ps = F.softmax(logit, dim=1)
ps

RuntimeError: Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU (while checking arguments for addmm)

In [44]:
three = cv2.imread("img/3.jpg")
outputs=to_mnist(three)

_, predicted = torch.max(outputs.data,1)
predicted

<memory at 0x7fbfe0b8d9a0>


TypeError: max() received an invalid combination of arguments - got (memoryview, int), but expected one of:
 * (Tensor input)
 * (Tensor input, name dim, bool keepdim, *, tuple of Tensors out)
 * (Tensor input, Tensor other, *, Tensor out)
 * (Tensor input, int dim, bool keepdim, *, tuple of Tensors out)


In [38]:
with torch.no_grad():
    model = model.cuda()
    three_mnist = three.cuda()

    ps = model(three_mnist)
    pred = torch.exp(ps)
torch.argmax(pred).item()

5

RuntimeError: Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU (while checking arguments for addmm)

'4.5.2'