# Preprocess Analog Data Inputs

The Required format for the model is a grayscaled 224x224 image. This notebook converts all the files in a folder into this format

### Set Up connection with Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


### Import Dependencies





In [0]:
import numpy as np
import pandas as pd
import os
from random import shuffle
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import tensorflow as tf
import cv2
import shutil

### Create Function that Resizes Image to 224x224

In [0]:
def image_resizer(input_file,c1,c2):
    """
    Inputs - 
    input_file - Image file to be processed
    c1 - Width of image shape
    c2 - Height dimension of image shape
    
    Process - 
    In case of digits where the image is very low width, a buffer is added on the left and right of the image
    
    This is done by repeating the leftmost column n times and the rightmost column m times so that the output dimension is 224 on width
    
    This is followed by a simple resize to make the height 224
    
    The padding on the left and right is done since in most cases of the digit '1', a simple resize would distort the image
    
    Output -
    Processed Image in the right dimensions
    
    """
    adj = input_file

    if adj.shape[1]<c1:
        addit_r  = int(np.floor((c1-adj.shape[1])/2))
        rightadd = np.repeat(adj[:,-1],addit_r).reshape(len(adj[:,-1]),addit_r)
        addit_l  = int(np.ceil((c1-adj.shape[1])/2))
        leftadd  = np.repeat(adj[:,0],addit_l).reshape(len(adj[:,0]),addit_l)
        
        adj = np.concatenate((leftadd,adj,rightadd),axis=1)

    
 
    adj = cv2.resize(adj,(c1,c2),interpolation=cv2.INTER_AREA)
  
    return adj

### Processing the Analog Data

In [0]:
# Setting Input Locations for training and validation data

analog_dir_tr = "/content/drive/My Drive/Team Bergere/Optimizer/Data/optimizer/HQ_analog/Split/Train/"
analog_dir_ts = "/content/drive/My Drive/Team Bergere/Optimizer/Data/optimizer/HQ_analog/Split/Test/"

# Setting Output Locations for training and validation data
analog_dir_trou = "/content/drive/My Drive/Team Bergere/Optimizer/Data/optimizer/HQ_analog/Split/Train_proc/"
analog_dir_tsou = "/content/drive/My Drive/Team Bergere/Optimizer/Data/optimizer/HQ_analog/Split/Test_proc/"


In [0]:
# Setting Parameters


# Distionary with class names from 0 to 10
classrange = dict()

for i in range(11):
    classrange[i] = str(i)+'/'

# Required target size of 224x224    
target_size = (224, 224)


### Define function that Reads, Processes and Writes Images from a given Directory to an output directory

In [0]:

def proc_image(loc,outp):

  """
  Inputs - 
  loc  - Input file location
  outp - Output File location

  Process - 
  The file is loaded using the OpenCV package and greyscaled
  
  Then it is resized using the previously defined function
  
  It is then written into the output directory

  Output -
  No output

  """
  
  counter = 0
  for file in os.listdir(loc):
    if "png" in file or 'jpg' in file and 'augmented' not in file and counter<1000:
      img_tc1 = cv2.imread(loc+file)
      img_tc1 = cv2.cvtColor(img_tc1, cv2.COLOR_BGR2GRAY)
      temp = image_resizer(img_tc1,target_size[0],target_size[1])
      cv2.imwrite(outp+file,temp)
      counter = counter+1
  return 1


### Loop through the class-wise directories and process images

In [10]:
# Load Analog files

for i in range(0,11):
    proc_image(analog_dir_tr+classrange[i],analog_dir_trou+classrange[i])
    print(str(i)+' 1')
    

0 1
1 1
2 1
3 1
4 1
5 1
6 1
7 1
8 1
9 1
10 1


### Result - The set of images from the input locations are processed and written to the output location.