In [None]:
def test_gen(input_df: pd.DataFrame,
             batch_size: int = 100,
             map_size: Tuple[int,int] = (64,64)):
  if not isinstance(input_df, pd.DataFrame):
    raise TypeError("input_df must be a pandas DataFrame")
  if not isinstance(batch_size, int):
    raise TypeError("batch_size must be an int")
  if not isinstance(map_size, tuple):
    raise TypeError("map_size must be a tuple")
  x_test, y_test = [], []
  # Finding the total number of fail mode within the df
  classes = input_df['failurenum'].max()
  # Sampling batch_size of data from the the Input
  test = input_df.sample(n=batch_size, replace=False)#, random_state=1)
  test.reset_index(inplace = True, drop = True)# reseting index inplace
  # Resize maps
  resized_maps = resize_images(test['wafermap'], map_size)
  # One hot encode maps, this returns a np.array and also resizes maps.
  encoded_maps = one_hot_encode(resized_maps, map_size )
  # Creates an array of dim: [batch_size] X ([classes]+1) of all zeros
  labels = np.zeros((encoded_maps.shape[0], classes+1))
  # fills in only the column corresponding to the failNum with "1"
  for i in range(encoded_maps.shape[0]):
      labels[i][test['failurenum'][i]] = 1
  del test
  x_test.extend(encoded_maps)
  y_test.extend(labels)
  x_test = np.array(x_test)
  y_test = np.array(y_test)
  # display("there are ",len(x_test),"maps for testing")
  # num += 1
  yield(x_test, y_test)

In [None]:
################################################################################
#----------------------Exploring methods to Resize Maps-------------------------
################################################################################
x_test, y_test = [], []
dsize = (64, 64)
# Extracts 3 wafer maps of a particular failure type into a pandas series.
extracted = label_pattern[label_pattern['failureType'] == 'Donut'].sample(1, replace=True)['waferMap']
extracted.reset_index(inplace = True, drop = True)# reseting index inplace
resized = extracted.apply(lambda x:cv2.resize(x, dsize=dsize))
# When resizing the maps we apply 5 diff. data imputation methods (interpolation)
# to see which works best: INTER_LINEAR, INTER_AREA, INTER_NEAREST, INTER_CUBIC,
# INTER_LANCZOS4
resized_interLinear = extracted.apply(lambda x:cv2.resize(x, dsize=dsize, interpolation=cv2.INTER_LINEAR))
resized_interArea = extracted.apply(lambda x:cv2.resize(x, dsize=dsize, interpolation=cv2.INTER_AREA))
resized_interNN = extracted.apply(lambda x:cv2.resize(x, dsize=dsize, interpolation=cv2.INTER_NEAREST))
resized_interCubic = extracted.apply(lambda x:cv2.resize(x, dsize=dsize, interpolation=cv2.INTER_CUBIC))
resized_interLANCZ = extracted.apply(lambda x:cv2.resize(x, dsize=dsize, interpolation=cv2.INTER_LANCZOS4))
# augmented = np.array(augment_images(resized))
# reshaped = reshape_images(augmented, dsize[1], dsize[0])

# ------------------------------Plotting Resize maps----------------------------
fig, ax = plt.subplots(nrows = 1, ncols = 6, figsize=(10,10))
# Because you defined the subplots as 2x4 then the ax handle is a 2x4 matrix
# In order to make it easier to loop through in a for loop you can flatten
# the handle to an array that's 1x8, that's what ravel does, it just flattens.
ax = ax.ravel(order='C')
x= ['Original','resized','resized Linear', 'resized Area', 'Resized Nearest Neigh.',\
    'resized Cubic','resized Lancz']
img = [extracted[0],resized[0],resized_interLinear[0],resized_interArea[0], \
       resized_interNN[0], resized_interCubic[0], resized_interLANCZ[0]]
for i in range(len(ax)):
    ax[i].imshow(img[i])
    ax[i].set_title(x[i]+"\n"+str(img[i].shape),fontsize=10)
    ax[i].set_xticks([])
    ax[i].set_yticks([])
plt.tight_layout()
plt.show()
# --------------------------------Conclusion------------------------------------
# From the interpolated results the best methods are either resized Area or
# resized Nearest Neigh. I choose Nearest Neigh over resized area for no good
# reason.

In [None]:
################################################################################
#-------------------------Augmenting Maps Examples------------------------------
################################################################################
# Create an array of augmented images
test_maps = label_pattern[label_pattern['failureType'] == 'Donut']['waferMap']\
            .sample(3, replace=True).reset_index(drop = True)
test_maps = resize_images(test_maps)
# test_maps = test_maps.apply(lambda x: cv2.resize(x, dsize=dsize,\
#                                                    interpolation=cv2.INTER_AREA))
augmented = np.array(augment_images(test_maps))
# -----------------------------Plotting Results--------------------------------
fig, ax = plt.subplots(nrows = 2, ncols = 3, figsize=(5,5))
# ax = ax.ravel(order='C')
for i in range(ax.shape[0]):
  if i == 0:
    for j in range(ax.shape[1]):
      ax[i][j].imshow(test_maps[j])
      ax[i][j].set_title("Original Map"+"\n"+str(test_maps[i].shape),\
                         fontsize=10)
      ax[i][j].set_xticks([])
      ax[i][j].set_yticks([])
  else:
    for j in range(ax.shape[1]):
      ax[i][j].imshow(augmented[j])
      ax[i][j].set_title("Augmented Map"+"\n"+str(augmented[i].shape),\
                         fontsize=10)
      ax[i][j].set_xticks([])
      ax[i][j].set_yticks([])
plt.tight_layout()
plt.show()

In [None]:
################################################################################
#----------------------How to: One Hot encode Maps------------------------------
################################################################################
# Saving a donut map as an example
map_exm = label_pattern[(label_pattern['failureType'] == 'Donut')].iloc[0,0]
# map_exm.reset_index(drop=True, inplace = True)
# smallest_map.reset_index(inplace = True, drop = True)# reseting index inplace
# for line in smallest_map:
#   print('\t'.join(map(str, smallest_map)))
# reshaped_images_test = np.zeros((1,map_exm.shape[0], map_exm.shape[1], 3))
# The above line creates a 4 dim array, the idea is to hold RGB maps where the
# RGB components are held in the last dimension. You start with zero maps and
# then fill each RGB component with a 1 according to whether its a Good Die,
# bad Die or Padding. We take advantage of how the Good, bad and padding are
# saved in order to do this:
# 1 = Good die
# 2 = Bad Die
# 0 = Padding
# So we save the padding in the first component
# (i.e. [0,:,:,0]), we then save the good die in the next component
# (i.e. [0,:,:,1]) and finally save the bad die in the last component
# (i.e. [0,:,:,2]). Here's an example:

# Original Map: [0,8,10]
# 0 0 0 1 1 1 0 0 0 0
# 0 0 1 1 1 1 1 0 0 0
# 0 1 1 2 2 2 1 1 0 0
# 1 1 1 2 2 2 1 1 1 0
# 0 1 1 2 2 2 1 1 0 0
# 0 0 1 1 1 1 1 0 0 0
# 0 0 0 1 1 1 0 0 0 0
# 0 0 0 0 0 0 0 0 0 0
# First component[0,;,;,0]   Second component[0,;,;,1] Last component[0,;,;,2]
# (Padding)                   (Good Die)                (Bad Die)

# 1 1 1 0 0 0 1 1 1 1        # 0 0 0 1 1 1 0 0 0 0    # 0 0 0 0 0 0 0 0 0 0
# 1 1 0 0 0 0 0 1 1 1        # 0 0 1 1 1 1 1 0 0 0    # 0 0 0 0 0 0 0 0 0 0
# 1 0 0 0 0 0 0 0 1 1        # 0 1 1 0 0 0 1 1 0 0    # 0 0 0 1 1 1 0 0 0 0
# 0 0 0 0 0 0 0 0 0 1        # 1 1 1 0 0 0 1 1 1 0    # 0 0 0 1 1 1 0 0 0 0
# 1 0 0 0 0 0 0 0 1 1        # 0 1 1 0 0 0 1 1 0 0    # 0 0 0 1 1 1 0 0 0 0
# 1 1 0 0 0 0 0 1 1 1        # 0 0 1 1 1 1 1 0 0 0    # 0 0 0 0 0 0 0 0 0 0
# 1 1 1 0 0 0 1 1 1 1        # 0 0 0 1 1 1 0 0 0 0    # 0 0 0 0 0 0 0 0 0 0
# 1 1 1 1 1 1 1 1 1 1        # 0 0 0 0 0 0 0 0 0 0    # 0 0 0 0 0 0 0 0 0 0

# The key line is this one:
# reshaped_images_test[0,h, w, smallest_map[0][h][w]] = 1
# for each X,Y of the zero'ed map it will assign a 1 on a given dimension
# depepnding if said coordinate was padding, good or bad die. For h=w=0 you will
# get a "1" on the padding component and all else will remain zero.

# The below for loop is a bit of an inneficient way of doing this compare
# for h in range(map_exm.shape[0]):
#     for w in range(map_exm.shape[1]):
#         reshaped_images_test[0,h, w, map_exm[h][w]] = 1
# The better way is this one liner I got from stackexchange.
# .transpose((1,2,0)) reorders the axes from (3,X_dim, Y_dim) to
# (X_dim, Y_dim, 3), where 3 is either the padding, Good Die or Bad Die
# [None] adds a fourth dimension to the array making it (nth_image, x_dim, y_dim, 3)
one_hot_test = np.array([(map_exm == i).astype(int) for i in range(3)])\
                              .transpose((1,2,0))[None]

# --------------------------Plotting Results------------------------------------
fig, ax = plt.subplots(nrows = 1, ncols = 4, figsize=(5,5))
ax = ax.ravel(order='C')
x = np.array([map_exm,one_hot_test[0,...,0],\
             one_hot_test[0,...,1],one_hot_test[0,...,2]])
title = ['Original Map','Padding','Good Die','Bad Die']
for i in range(len(ax)):
    ax[i].imshow(x[i])
    ax[i].set_title(title[i],fontsize=10)
    ax[i].set_xticks([])
    ax[i].set_yticks([])
plt.tight_layout()
plt.show()