In [1]:
%pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.6.0.66-cp36-abi3-win_amd64.whl (35.6 MB)
     ---------------------------------------- 35.6/35.6 MB 1.0 MB/s eta 0:00:00
Installing collected packages: opencv-python
Successfully installed opencv-python-4.6.0.66

[notice] A new release of pip available: 22.1.2 -> 22.2.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.


# Images Data Augmentation
## opencv important functions
<b>get image size: </b>dimensions = img.shape </br>
<b>image resizing: </b>new_img = cv2.resize(img, dim(new_width, new_height))</br>
<b>transfer to grayscale</b>gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)</br>

## Examples for Data Augmentation
### Affine transformation
In geometry transformation is applied by a transofrmation matrix
<ul>
<li>For translation this matrix is used</li>
<img src="images/5_translation.png"></img>
<li>For rotation this matrix is used</li>
<img src="images/6_rotation.png"></img>
</ul>
<b>opencv automates this process on the following two steps</b>
<ol>
<li>
<b>Get transformation matrix</b>
<ul>
<li><b>For translation:</b> M = np.float32([[1,0,vertical_shift],[0,1,horizontal_shift]])</li>
<li><b>For rotation:</b> M = cv::getRotationMatrix2D(center, rotation_angle, scale)</li>
</ul>
</li>
<li>
<b>apply transformation matrix on the image</b></br>
transformed_img = cv2.warpAffine(img,M,(width,height))
</li>
</ol>

### Image flipping
flip_type = 0 #0: vertical, 1: horizontal, -1: bot vertical and horizontal
flipped_img = cv2.flip(img, 0)

### Adding Random Exposure (In case that the model should tolerate all lighting conditions)
edit brightness and contrast
transformed_img = cv2.convertScaleAbs(img, alph, beta)
<ul> <li>alpha = 1.5 # Contrast control (1.0-3.0)</li>
<li>beta = 0 # Brightness control (0-100)</li></ul>

### Normalizing channels
<ul>
<li>Transform the color range from [0, 255] to [0, 1]</li>
<li>Subtract the mean value of each channel</li>
</ul>

## opencv transformation


In [1]:
import cv2
import random
import pandas as pd
import numpy as np
import math
import scipy.ndimage
import tensorflow as tf
img = cv2.imread('bird.jpg')

In [2]:
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print(gray)


[[146 147 148 ... 109 109 108]
 [146 147 147 ... 108 108 108]
 [146 147 147 ... 108 109 109]
 ...
 [ 64  62  61 ...  17  24  34]
 [ 68  65  63 ...  22  28  38]
 [ 71  68  65 ...  24  26  24]]


In [3]:
#resize the image 128*128
resized_image = cv2.resize(gray, (128, 128))
print(resized_image)


[[147 147 146 ... 109 109 108]
 [147 147 147 ... 109 110 111]
 [146 148 147 ... 109 110 112]
 ...
 [ 72  75  97 ...   5  26  12]
 [ 67  68  90 ...  25  33  25]
 [ 64  64  79 ...  31  13  25]]


In [4]:
# scaling for this image (0.5, 2)
scaled_image = resized_image * 0.5
print(scaled_image)

[[73.5 73.5 73.  ... 54.5 54.5 54. ]
 [73.5 73.5 73.5 ... 54.5 55.  55.5]
 [73.  74.  73.5 ... 54.5 55.  56. ]
 ...
 [36.  37.5 48.5 ...  2.5 13.   6. ]
 [33.5 34.  45.  ... 12.5 16.5 12.5]
 [32.  32.  39.5 ... 15.5  6.5 12.5]]


In [5]:
#random rotation for this image
rotated_image = scipy.ndimage.rotate(scaled_image, random.randint(0, 360))
print(rotated_image)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [6]:
#apply a random translation to the image
translated_image = scipy.ndimage.shift(rotated_image, (random.randint(0, 10), random.randint(0, 10)))
print(translated_image)

[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 ...
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ... -8.30570866e-52
  -2.98429895e-52  2.14170985e-52]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  9.41589021e-52
   7.58717033e-53  5.20323442e-53]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00 ...  1.86870424e-52
   1.13297500e-53 -5.76135852e-54]]


In [7]:
## create data augmentation function that exsposes 10 random examples of the input image
def data_augmentation(img):
    # create a list of 10 random images
    images = []
    for i in range(10):
        # create a random image
        images.append(img)
        # apply random rotation to the image
        images.append(scipy.ndimage.rotate(img, random.randint(0, 360)))
        # apply random translation to the image
        images.append(scipy.ndimage.shift(img, (random.randint(0, 10), random.randint(0, 10))))
        # apply random scaling to the image
        images.append(img * random.uniform(0.5, 2))
    return images
        

In [8]:
data_augmentation(scaled_image)

[array([[73.5, 73.5, 73. , ..., 54.5, 54.5, 54. ],
        [73.5, 73.5, 73.5, ..., 54.5, 55. , 55.5],
        [73. , 74. , 73.5, ..., 54.5, 55. , 56. ],
        ...,
        [36. , 37.5, 48.5, ...,  2.5, 13. ,  6. ],
        [33.5, 34. , 45. , ..., 12.5, 16.5, 12.5],
        [32. , 32. , 39.5, ..., 15.5,  6.5, 12.5]]),
 array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 array([[ 0. ,  0. ,  0. , ...,  0. ,  0. ,  0. ],
        [ 0. ,  0. ,  0. , ...,  0. ,  0. ,  0. ],
        [ 0. ,  0. , 73.5, ..., 54.5, 54.5, 54.5],
        ...,
        [ 0. ,  0. , 44. , ...,  5.5,  9. , 24.5],
        [ 0. ,  0. , 45. , ..., 18. ,  9. , 10. ],
        [ 0. ,  0. , 36. , ...,  2.5,  4. ,  2.5]]),
 array([[57.19501722, 57.19501722, 56.80593547, ..., 42.40991072,
         42.40991072, 42.02082898],
       

# Exploring data in csv file

In [20]:
#Ref: https://github.com/random-forests/tensorflow-workshop/blob/master/archive/examples/07_structured_data.ipynb



census_train = pd.read_csv('./data/census/train.csv', index_col=False) 
census_test = pd.read_csv('./data/census/test.csv', skiprows=1, index_col=False) 


census_train = census_train.dropna(how="any", axis=0)
census_test = census_test.dropna(how="any", axis=0)

#you turn! Analyise data
# get hash unique values
print(census_train.income.unique())
# get age unique values
print(census_train.age.unique())

[' <=50K' ' >50K']
[39 50 38 53 28 37 49 52 31 42 30 23 32 40 34 25 43 54 35 59 56 19 20 45
 22 48 21 24 57 44 41 29 18 47 46 36 79 27 67 33 76 17 55 61 70 64 71 68
 66 51 58 26 60 90 75 65 77 62 63 80 72 74 69 73 81 78 88 82 83 84 85 86
 87]


In [24]:
census_train.columns

Index(['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')

In [25]:
# A list of the feature columns we'll use to train the Linear model
feature_columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']


In [26]:
#1- Numerical attributes
##1.1- Bucketizing
tf.feature_column.bucketized_column

age = tf.feature_column.numeric_column('age')
age_buckets_1 = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column('age'), 
    boundaries=[31, 46, 60, 75, 90] # specify the ranges
)

age_buckets_2 = tf.feature_column.bucketized_column(
    tf.feature_column.numeric_column('age'),
    list(range(10))
)

feature_columns.append(age_buckets_2)

In [27]:
indices = [0, 1, 2]
depth = 3
tf.one_hot(indices, depth)  # output: [3 x 3]
# [[1., 0., 0.],
#  [0., 1., 0.],
#  [0., 0., 1.]]


<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)>

In [28]:
#1.2
education = tf.feature_column.categorical_column_with_vocabulary_list(
    "education", [
        "Bachelors", "HS-grad", "11th", "Masters", "9th",
        "Some-college", "Assoc-acdm", "Assoc-voc", "7th-8th",
        "Doctorate", "Prof-school", "5th-6th", "10th", "1st-4th",
        "Preschool", "12th"
    ])

feature_columns.append(education)

In [29]:
#1.3 hashed bucket
native_country = tf.feature_column.categorical_column_with_hash_bucket('native-country', 1000)
feature_columns.append(native_country)

In [30]:
#1.4 cross column; combine two features
age_cross_education = tf.feature_column.crossed_column(
    [age_buckets_1, education],
    hash_bucket_size=int(1e4) # Using a hash is handy here
)
feature_columns.append(age_cross_education)