In [2]:
import os
import cv2
import pprint
import rasterio
import numpy as np
from PIL import Image
from pathlib import Path
from rasterio.mask import mask
from rasterio.plot import reshape_as_image
from shapely.geometry import Polygon, mapping

In [133]:
def getDataByDataKeyDate(dataFilePath, dateList):
    ''' 
    get labelled data by date: ["202404251118", "202404301146", "202405071327", "202405131248", "202405171307", "202405221319", "202405271230", "202405311536", "202406041351", "202406071509", "202406111255", "202406141237", "202406171112", "202406241205"]
    '''
    returnData = []

    # open data path file
    with open(dataFilePath, "r") as file:
        allData = file.read().splitlines()
        
    # loop into each line (date)
    for dataLine in allData:

        # spilt data
        splittedDataLine = dataLine.split(',') 
        # get data date
        date = splittedDataLine[1].split('_')[0]

        # check if date in datelist
        if (date in dateList):
            returnData.append(splittedDataLine)

    return returnData

def selectDataByRawImgType(allData, selectedRawImgKey):
    '''
    filter data from original, raw1 - raw10 to only selected one
    '''
    returnData = []

    # loop into every data
    for eachData in allData:

        # get image type : original / raw1-raw10
        imageType = eachData[0].split("_")[-3]

        # check if data's img type in selected one
        if(imageType in selectedRawImgKey):
            returnData.append(eachData)

    return returnData

def selectDataByAugmentMethod(allData, selectedAugmentedMethod):
    '''
    filter data by augmentation method:
    '''
    returnData = []

    # loop into every data
    for eachData in allData:

        # get augmented method
        augmentedMethod = (eachData[0].split("_")[-1].split('.')[0])

        # check if data's img type in selected one
        if (augmentedMethod in selectedAugmentedMethod):
            returnData.append(eachData) 

    return returnData

def selectDataByDate(allData, selectedDate):
    '''
    filter data by date in path
    '''
    returnData = []

    # loop into every data
    for eachData in allData:

        # get augmented method
        eachDataDate = eachData[0].split("_")[-5]

        # check if data's img type is in selected one
        if (eachDataDate in selectedDate):
            returnData.append(eachData)

    return returnData

In [173]:
dataFilePath = "D:/ice-wheat/data/dataForProcess/mainData/completeLabelData.txt"
dataKeyDateList = ["202404251118", "202404301146", "202405071327", "202405131248", "202405171307", "202405221319", "202405271230", "202405311536", "202406041351", "202406071509", "202406111255", "202406141237", "202406171112", "202406241205", "999999999999"]
rawImgKey = [ "original", "raw1", "raw2", "raw3", "raw4", "raw5", "raw6", "raw7", "raw8", "raw9", "raw10"]
augmentMethod = ['original', 'flipped', 'rotated', 'zoomed', 'brightenOriginal', 'darkenOriginal', 'brightenFlipped', 'darkenFlipped', 'jittered', 'noisy']
dateList = ['202401181250', '202401221100', '202401291321', '202402071317', '202402081107', 
            '202402131116', '202402191131', '202402261154', '202403041133', '202403111217', 
            '202403191047', '202403251407', '202404011045', '202404101010', '202404151134', 
            '202404171400', '202404221142', '202404251118', '202404301146', '202405071327', 
            '202405131248', '202405171307', '202405221319', '202405271230', '202405311536', 
            '202406041351', '202406071509', '202406111255', '202406141237', '202406171112', 
            '202406241205']
databyDataDateKey = getDataByDataKeyDate(dataFilePath, dataKeyDateList)
selectedDataImgType = selectDataByRawImgType(databyDataDateKey, rawImgKey[0:4])
selectedDataAugMethod = selectDataByAugmentMethod(databyDataDateKey, augmentMethod)
dataByDate = selectDataByDate(databyDataDateKey, dateList[17:])

In [175]:
len(dataByDate)

165600