In [1]:
import os
import argparse
import math
import time

import ast
from copy import copy

import pandas as pd
import numpy as np
from glob import glob
from pyproj import Proj
from shapely.ops import transform
import rasterio

import pyproj
#ultralytics.checks()
import rioxarray
from shapely.geometry import Point
import geopandas as gpd
import concurrent
import concurrent.futures
from concurrent.futures.thread import ThreadPoolExecutor
import cv2

In [2]:
def tile_to_chip_array(tile, x, y, item_dim): #used
    """
    https://stackoverflow.com/questions/15589517/how-to-crop-an-image-in-opencv-using-python
    ##
    x: col index
    y: row index
    """
    dimensions = tile.shape[2]
    chip_img = tile[y*item_dim:y*item_dim+item_dim, x*(item_dim):x*(item_dim)+item_dim]
    #add in back space if it is the edge of an image
    if (chip_img.shape[0] != item_dim) & (chip_img.shape[1] != item_dim): #width
        #print("Incorrect Width")
        chip = np.zeros((item_dim,item_dim,dimensions), np.uint8)
        chip[0:chip_img.shape[0], 0:chip_img.shape[1]] = chip_img
        chip_img = chip
    if chip_img.shape[0] != item_dim:  #Height
        black_height = item_dim  - chip_img.shape[0] #Height
        black_width = item_dim #- chip_img.shape[1] #width
        black_img = np.zeros((black_height,black_width,  dimensions), np.uint8)
        chip_img = np.concatenate([chip_img, black_img])
    if chip_img.shape[1] != item_dim: #width
        black_height = item_dim #- chip_img.shape[0] #Height
        black_width = item_dim - chip_img.shape[1] #width
        black_img = np.zeros((black_height,black_width, dimensions), np.uint8)
        chip_img = np.concatenate([chip_img, black_img],1)
    return(chip_img)


def chip_tiles(tile_path, chips_dir, item_dim): 
        """Segment tiles into item_dim x item_dim pixel chips, preserving resolution
        """
        tile_name_wo_ext, ext = os.path.splitext(os.path.basename(tile_path))  # File name
        try:
            tile = cv2.imread(tile_path)
            if None:
                return None, None
            else:
                tile_height, tile_width, tile_channels = tile.shape  # the size of the tile
                # divide the tile into item_dim by item_dim chips (rounding up)
                row_index = math.ceil(tile_height / item_dim)
                col_index = math.ceil(tile_width / item_dim)
                count = 0
                for y in range(0, row_index):
                    for x in range(0, col_index):
                        # 
                        # specify the path to save the image
                        chip_img = tile_to_chip_array(tile, x, y, item_dim) #chip tile
                        chip_name = tile_name_wo_ext + '_' + f"{y:02}" + '_' + f"{x:02}" + '.jpg'  #
                        chips_save_path = os.path.join(chips_dir, chip_name)  # row_col.jpg                    
                        cv2.imwrite(os.path.join(chips_save_path), chip_img) # save image
                        count += 1
                        del chip_img
                return tile_name_wo_ext, count
        except Exception as exc:
            print(tile_path)


            


def chip_tiles_concurrent(tile_paths, chips_dir, item_dim=640, connections=6):
    # parse html and retrieve all href urls listed
    # create the pool of worker threads
    with concurrent.futures.ThreadPoolExecutor(connections-4) as executor:
        # dispatch all download tasks to worker threads
        futures = [executor.submit(chip_tiles, tile_path, chips_dir, item_dim=item_dim) for tile_path in tile_paths]
        # report results as they become available
        for future in concurrent.futures.as_completed(futures):
            try:
                # retrieve result
                tile_name_wo_ext, count = future.result()
            except Exception as exc:
                print(exc)
                #os.remove(tile)

In [3]:
def get_args_parse():
    parser = argparse.ArgumentParser("")
    parser.add_argument("--tile_dir", default="/work/csr33/images_for_predictions/naip_tiles", type=str)
    parser.add_argument('--img_dir', type=str, default="/work/csr33/images_for_predictions/naip_imgs")
    parser.add_argument("--connections", default=6, type=int)
    parser.add_argument("--imgsz", default=640, type=int)
    parser.add_argument("--chunk_id", type=int)
    parser.add_argument("--tilename_chunks_path", default='/hpc/home/csr33/ast_object_detection/images_for_prediction/tilename_chunks.npz', type=str)
    args = parser.parse_args()
    return args
import sys
sys.argv = ['my_notebook']
args = get_args_parse()

0                 0.79
1                 1.05
2                 1.08
3                 0.79
4                 0.93
            ...       
646                NaN
647    0 mmHg (approx)
648               5.61
649               1.10
650       6.51 (Metal)
Name: Specific gravity, Length: 651, dtype: object

In [19]:
chemical_data

Unnamed: 0.1,Unnamed: 0,Chemical Name,Formula,CAS#,RTECS#,IDLH,Conversion,DOT,Synonyms/Trade Names,Physical Description,...,Specific gravity- Additional Info,Flash point- Additional Info,Melting point for solids,Relative density of gases,Solubility- Additional Info,Minimum explosive concentration,Vapor pressure- Additional Info,Lower explosive limit at room temp- Additional Info,Boiling point- Additional Info,Melting point for solids- Additional Info
0,0,Acetaldehyde,CH3CHO,75-07-0,AB1925000,Ca [2000 ppm],1 ppm = 1.80 mg/m3,1089 129,"Acetic aldehyde, Ethanal, Ethyl aldehyde",Colorless liquid or gas (above 69°F) with a pu...,...,,,,,,,,,,
1,0,Acetic acid,CH3COOH,64-19-7,AF1225000,50 ppm,1 ppm = 2.46 mg/m3,2790 153 (10-80% acid); 2789 132 (>80% acid),"Acetic acid (aqueous), Ethanoic acid, Glacial ...","Colorless liquid or crystals with a sour, vine...",...,,,,,,,,,,
2,0,Acetic anhydride,(CH3CO)2O,108-24-7,AK1925000,200 ppm,1 ppm = 4.18 mg/m3,1715 137,"Acetic acid anhydride, Acetic oxide, Acetyl ox...","Colorless liquid with a strong, pungent, vineg...",...,,,,,,,,,,
3,0,Acetone,(CH3)2CO,67-64-1,AL3150000,2500 ppm [10%LEL],1 ppm = 2.38 mg/m3,1090 127,"Dimethyl ketone, Ketone propane, 2-Propanone","Colorless liquid with a fragrant, mint-like odor.",...,,,,,,,,,,
4,0,Acetone cyanohydrin,CH3C(OH)CNCH3,75-86-5,OD9275000,N.D.,1 ppm = 3.48 mg/m3,1541 155 (stabilized),"Cyanohydrin-2-propanone, 2-Cyano-2-propanol, α...",Colorless liquid with a faint odor of bitter a...,...,(77°F),,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,0,Zinc chloride fume Y,,,,,,,,,...,,,,,,,,,,
647,Z,ZnCl2,7646-85-7,ZH1400000,50 mg/m3,,2331 154,Zinc dichloride fume,White particulate dispersed in air.,Potassium,...,,554°F,,(70°F),,,,,,
648,0,Zinc oxide,ZnO,1314-13-2,ZH4810000,500 mg/m3,,1516 143,Zinc peroxide,"White, odorless solid.",...,,,3587°F,,(64°F),,,,,
649,0,Zinc stearate,Zn(C18H35O2)2,557-05-1,ZH5200000,N.D.,,,"Dibasic zinc stearate, Zinc salt of stearic ac...","Soft, white powder with a slight, characterist...",...,,(oc),266°F,,,20 g/m3,,,,


In [4]:
imgs = os.listdir("/work/csr33/images_for_predictions/naip_imgs")

In [5]:
img_tile_names = np.unique([img.rsplit("_",2)[0] for img in imgs])

In [11]:
len(img_tile_names)

13966

In [6]:
tiles = os.listdir("/work/csr33/images_for_predictions/naip_tiles")
tile_names = [os.path.splitext(tile)[0] for tile in tiles]

In [7]:
remaining_tiles = list(set(tile_names) - set(img_tile_names))

In [8]:
tile_dir = "/work/csr33/images_for_predictions/naip_tiles"
tile_paths = [os.path.join(tile_dir, remaining_tile +".tif") for remaining_tile in remaining_tiles]

In [10]:
len(tile_paths)

46

In [13]:
#yolov8/bash/error/.ipynb_checkpoints/tune-checkpoint.err

In [12]:
tile_name_wo_ext_list = []
count_list = []
start_time = time.time()
with concurrent.futures.ThreadPoolExecutor(26) as executor:
    # dispatch all download tasks to worker threads
    futures = [executor.submit(chip_tiles, tile_path, args.img_dir, item_dim=args.imgsz) 
               for tile_path in tile_paths]
    # report results as they become available
    for future in concurrent.futures.as_completed(futures):
        try:
            # retrieve result
            tile_name_wo_ext, count = future.result()
            tile_name_wo_ext_list.append(tile_name_wo_ext)
            count_list.append(count)
        except Exception as exc:
            print("")
            #os.remove(tile)
print(time.time() - start_time)

246.74082851409912
