#**Reading Text Files**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import csv
import os
from PIL import Image
import numpy as np

# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

#colab can't access shared folders so I created a folder in my drive and to add individual folders from the data

with open('drive/MyDrive/AgriLifeMicrohistologicalProject/161HD/002/161HD_002.txt', newline='') as f: #reads txt file and turns into 2d list
    reader = csv.reader(f, delimiter=' ')
    data = list(reader)

grass = []
holder = []
count = 0

for x in data: #turns all string in 2d list to float
  for y in x:
    holder.append(float(y))
    count += 1

  if count == 5:
    count = 0
    grass.append(holder)
    holder = []

print(grass)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[[3.0, 0.229004, 0.103788, 0.02373, 0.027424], [3.0, 0.414893, 0.113337, 0.020508, 0.022168], [1.0, 0.699707, 0.051751, 0.071973, 0.056565], [1.0, 0.625317, 0.192772, 0.029932, 0.084665], [4.0, 0.538086, 0.198002, 0.027344, 0.035385], [1.0, 0.633301, 0.512983, 0.062207, 0.073581], [3.0, 0.408374, 0.705339, 0.01792, 0.018369], [2.0, 0.347656, 0.810975, 0.05791, 0.032627], [1.0, 0.120752, 0.719207, 0.048828, 0.063485], [1.0, 0.111646, 0.873445, 0.057959, 0.043711], [1.0, 0.236328, 0.829006, 0.044922, 0.047874]]


#**Feature Snipping**

In [2]:
def upload_cropped_image(img_cropped, folder_name, img_name):
  folder_path = f'/content/drive/My Drive/AgriLifeMicrohistologicalProject/FeatureImages/{folder_name}'
  if not os.path.exists(folder_path):
    os.makedirs(folder_path)

  fig, ax = plt.subplots()  # Create a figure and axes
  ax.imshow(img_cropped)  # Display the image on the axes
  ax.axis('off')  # Turn off axis markers

  # Save the figure, if there's a duplicate save with the same name savefig will auto overwrite the previous save so no duplicates
  fig.savefig(f'{folder_path}/{img_name}.png', bbox_inches='tight', pad_inches = 0)
  plt.close(fig)

In [3]:
img_path = 'drive/MyDrive/AgriLifeMicrohistologicalProject/161HD/002/161HD_002.png'

#creates concatenates to name the files based on the given unique image name
filename = os.path.basename(img_path).split('.')[0].replace(" ", "")

img = plt.imread(img_path) #gets image data

nale_count = 0
qufu_count = 0
erci_count = 0
bubble_count = 0
qufu_stem_count = 0

#crops the image based on every line in the text file and uploads them to a given folder using each picture's unique name
for ind in range(len(grass)):
  height, width = img.shape[0:2] #stores dimensions of image

  #snipping math using txt file numbers
  x_min = round((width * grass[ind][1]) - (width * grass[ind][3])/2)
  x_max = round((width * grass[ind][1]) + (width * grass[ind][3])/2)
  y_min = round((height * grass[ind][2]) - (height * grass[ind][4])/2)
  y_max = round((height * grass[ind][2]) + (height * grass[ind][4])/2)

  img_cropped = img[y_min:y_max, x_min:x_max]
  if(grass[ind][0] == 0):
    nale_count += 1 #labels each unique feature snipped from the image
    upload_cropped_image(img_cropped, '0 - nale', f"{filename}_{nale_count}")

  elif(grass[ind][0] == 1):
    qufu_count += 1
    upload_cropped_image(img_cropped, '1 - qufu', f"{filename}_{qufu_count}")

  elif(grass[ind][0] == 2):
    erci_count += 1
    upload_cropped_image(img_cropped, '2 - erci', f"{filename}_{erci_count}")

  elif(grass[ind][0] == 3):
    bubble_count += 1
    upload_cropped_image(img_cropped, '3 - bubble', f"{filename}_{bubble_count}")

  elif(grass[ind][0] == 4):
    qufu_stem_count += 1
    upload_cropped_image(img_cropped, '4 - qufu stem', f"{filename}_{qufu_stem_count}")

#**Sub Feature Snipping**

In [44]:
def splice_featimg(featimg_path, output_folder, sub_image_size=100):
    featimg = plt.imread(featimg_path)

    # Get image dimensions
    height, width, channels = featimg.shape
    #print(featimg.shape)

    # Iterate over the image in sub-image-sized blocks
    sub_image_count = 0  # Initialize sub-image counter
    for y in range(0, height, sub_image_size):
        for x in range(0, width, sub_image_size):
            # Extract the sub-image
            sub_img = featimg[y:y + sub_image_size, x:x + sub_image_size]

            #checks if sub image is the expected dimensions and checks to make sure it's not all whitespace
            if sub_img.shape[0] == sub_image_size and sub_img.shape[1] == sub_image_size:

                # Separate the RGB and alpha channels
                rgb = sub_img[..., :3]
                alpha = sub_img[..., 3]

                # Convert the RGB part to grayscale
                grayscale = np.mean(rgb, axis=-1)

                # Apply the threshold to detect white pixels
                white_pixels = np.sum((grayscale >= (240/255)) & (alpha > 0))

                # Total number of non-transparent pixels
                total_pixels = np.sum(alpha > 0)

                # Calculate the percentage of white pixels
                whitespace_percentage = (white_pixels / total_pixels) * 100

                print(f"{os.path.basename(featimg_path).split('.')[0]}_sub_{sub_image_count} whitespace percent: {whitespace_percentage}")

                # Ignore sub-images with more than 95% whitespace
                if whitespace_percentage < 95:

                  # Create a figure and axes for the sub-image
                  fig, ax = plt.subplots()

                  # Display the sub-image on the axes
                  ax.imshow(sub_img)
                  ax.axis('off')  # Turn off axis markers

                  # Save the sub-image using plt.savefig and names it
                  sub_image_filename = f"{os.path.basename(featimg_path).split('.')[0]}_sub_{sub_image_count}.png"
                  sub_image_path = os.path.join(output_folder, sub_image_filename)
                  fig.savefig(sub_image_path, bbox_inches='tight', pad_inches=0)

                  # Close the figure to release resources
                  plt.close(fig)

                  sub_image_count += 1  # Increment sub-image counter

In [48]:
import os

# Get a list of all featimg files in the '4 - qufu stem' folder
featfolder = '4 - qufu stem'
featimg_folder = f'drive/MyDrive/AgriLifeMicrohistologicalProject/FeatureImages/{featfolder}'
featimg_files = [f for f in os.listdir(featimg_folder) if f.endswith('.png')]

# Splice each featimg
output_subimg_folder = f'drive/MyDrive/AgriLifeMicrohistologicalProject/SubImages/{featfolder}'
for featimg_file in featimg_files:
    featimg_path = os.path.join(featimg_folder, featimg_file)
    splice_featimg(featimg_path, output_subimg_folder)

(369, 106, 4)
174Export001_1_sub_0 whitespace percent: 0.22999999999999998
174Export001_1_sub_1 whitespace percent: 0.0
174Export001_1_sub_2 whitespace percent: 0.0
(369, 178, 4)
182HD_002_1_sub_0 whitespace percent: 8.99
182HD_002_1_sub_1 whitespace percent: 14.000000000000002
182HD_002_1_sub_2 whitespace percent: 15.68
(369, 112, 4)
180HD_003_1_sub_0 whitespace percent: 3.93
180HD_003_1_sub_1 whitespace percent: 0.51
180HD_003_1_sub_2 whitespace percent: 13.850000000000001
(369, 304, 4)
161HD_002_1_sub_0 whitespace percent: 21.12
161HD_002_1_sub_1 whitespace percent: 37.16
161HD_002_1_sub_2 whitespace percent: 99.91
161HD_002_1_sub_2 whitespace percent: 20.669999999999998
161HD_002_1_sub_3 whitespace percent: 47.089999999999996
161HD_002_1_sub_4 whitespace percent: 67.73
161HD_002_1_sub_5 whitespace percent: 20.25
161HD_002_1_sub_6 whitespace percent: 35.78
161HD_002_1_sub_7 whitespace percent: 26.87
