In [77]:
import os
import cv2
import random
import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd 

In [78]:
def writeFile(pathList, fileName):
    # print(content)
    file = open(fileName, "a")

    for path in pathList:
        file.write(path)
        file.write("\n")

    file.close()

def getMultipliedDataPath():
    # get augmentedDataPath
    with open("augmentedDataPath.txt", "r") as file:
        pathLines = file.read().splitlines() 

    # get path multiplier
    with open("dataPathMultiplier.txt", "r") as file:
        multiplierList = file.read().splitlines() 

    newDataPath = []

    # loop into each path
    for path in pathLines:
        # get data key for data
        dataKey = "_".join((path.split('/')[-1].split("_")[1:3]))
        if(path.split('/')[-1][0] == '.'):
            continue

        # loop into multiplier to create complete data path to assign data key
        for multiplier in multiplierList:
            completePath = "/RGB" + path + multiplier
            newDataPath.append([dataKey, completePath])
            # print(completePath)
            
    return newDataPath

def getWheatData():
    # get wheat data
    with open("2025WheatData.csv", "r") as file:
        wheatDataList = file.read().splitlines()[1:]

    # change wheat data to dict, using dataKey as key
    wheatDataDict = {}
    for wheatData in wheatDataList:
        wheatData = wheatData.split(',')
        wheatDataDict[wheatData[0]] = wheatData[1:]
    return (wheatDataDict)

def linkData(newDataPath, wheatDataDict):
    ''' 
    link data path list 
    and data dict
    using dataKey, if not match change key to 999999999999_xx
    '''
    completeLabelDataList = []

    # loop into dataPath
    for dataPathKey in newDataPath:
        
        #separate path and key
        dataKey = dataPathKey[0]
        dataPath = dataPathKey[1]

        # # check if key match, if not, assign 999999999999_xx as key
        # if (dataKey not in [*wheatDataDict]):
        #     dataKey = "999999999999_" + dataKey.split('_')[-1]
        
        # get wheat data using key
        # print(dataKey)
        thisPathDictData = wheatDataDict[dataKey]

        # combine path, key , [wheat data] to 1 list
        completeLabelData = [dataPath, dataKey]
        for eachData in thisPathDictData:
            completeLabelData.append(eachData)

        completeLabelDataList.append((','.join(completeLabelData)))


    return completeLabelDataList

def getAllDSMPath(DSMPath):
    ''' 
    get all augmented dsm path from main DSM path
    '''
    returnData = {}

    #loop into day folder
    for mainFolder in os.listdir(DSMPath):
        mainFolderPath = DSMPath + '/' + mainFolder

        #loop into label folder
        for labelFolder in os.listdir(mainFolderPath):
            labelFolderPath = mainFolderPath + '/' + labelFolder

            # loop into every file
            for dataFile in os.listdir(labelFolderPath):
                if(dataFile[-17:] == "correctedTilt.tif"):
                    correctedTiltFilePath = labelFolderPath + '/' + dataFile
                    returnData[dataFile] = correctedTiltFilePath

                # check if is Augmented folder
                if(dataFile == "Augmented" ):
                    dataFilePath = labelFolderPath + '/' + dataFile

                    # loop into every dsm file
                    for dsmFile in os.listdir(dataFilePath):
                        # final check if file is .tif
                        if(dsmFile[-4:] == ".tif" and dsmFile[0] != '.'):
                            dsmFilePath = dataFilePath + '/' + dsmFile
                            returnData[dsmFile] = dsmFilePath

    return returnData

def linkDSMData(allDataList, DSMPathDict):
    ''' 
    link data with DSM using path
    '''
    returnData = []

    # loop into every data
    for dataLine in allDataList:

        # create key(dsm file name) to link from image path
        splittedDataLine = dataLine.split(",")
        imgPath = splittedDataLine[0]
        imgName = (imgPath.split("/")[-1])
        KeyFrontPart = "DSM" + "_" + imgName.split("_")[1] + "_" + imgName.split("_")[2] + "_original_"

        # check what kind of dsm image have to be assigned (normal, flipped, rotated, zoomed) 
        if(imgName[-len("Flipped.jpg"):] == "Flipped.jpg" or imgName[-len("flipped.jpg"):] == "flipped.jpg"):
            DSMPathKey = KeyFrontPart + "flipped.tif"
        elif(imgName[-len("rotated.jpg"):] == "rotated.jpg"):
            DSMPathKey = KeyFrontPart + "rotated.tif"
        elif(imgName[-len("zoomed.jpg"):] == "zoomed.jpg"):
            DSMPathKey = KeyFrontPart + "zoomed.tif"
        else:
            DSMPathKey = KeyFrontPart + "correctedTilt.tif"

        # get DSM path from key
        assignedDSMPath = DSMPathDict[DSMPathKey]
        essentialPath = assignedDSMPath.split("/")[7:]
        finalDSMPath = '/DSM/' + '/'.join(essentialPath)


        # add dsm to data
        linkedDataList = dataLine + "," + (finalDSMPath)
        returnData.append(linkedDataList)

    return returnData


In [79]:
newDataPath = getMultipliedDataPath()
wheatDataDict = getWheatData()
completeLabelDataList = linkData(newDataPath,wheatDataDict)
DSMMainPath = "D:/ice-wheat/data/dataForProcess/mainData/DSM"
DSMMainPath = "/Volumes/PortableSSD/dataForProcess/2025MainData/DSM"
DSMDataPathDict = getAllDSMPath(DSMMainPath)
completeLinkedDSMDataList = linkDSMData(completeLabelDataList, DSMDataPathDict)

# dataPathPandas = pd.DataFrame(newDataPath)
# dataPathPandas.columns = ['DataKey', 'DataPath']
# wheatData = pd.read_csv("2024allLabelData.csv")

In [80]:
writeFile(completeLabelDataList, "/Volumes/PortableSSD/dataForProcess/2025MainData/completeLabelData.txt")
writeFile(completeLinkedDSMDataList, "/Volumes/PortableSSD/dataForProcess/2025MainData/completeLabelDataLinkedDSM.txt")

In [81]:
len(completeLabelDataList)


105600

In [82]:
completeLinkedDSMDataList

['/RGB/RGB_202503010913_1/Augmented/RGB_202503010913_1_original_enhanced_brightenFlipped.jpg,202503010913_1,1,0,1/3/2025,,,594,/DSM/DSM_202503010913_1/Augmented/DSM_202503010913_1_original_flipped.tif',
 '/RGB/RGB_202503010913_1/Augmented/RGB_202503010913_1_original_enhanced_brightenOriginal.jpg,202503010913_1,1,0,1/3/2025,,,594,/DSM/DSM_202503010913_1/DSM_202503010913_1_original_correctedTilt.tif',
 '/RGB/RGB_202503010913_1/Augmented/RGB_202503010913_1_original_enhanced_darkenFlipped.jpg,202503010913_1,1,0,1/3/2025,,,594,/DSM/DSM_202503010913_1/Augmented/DSM_202503010913_1_original_flipped.tif',
 '/RGB/RGB_202503010913_1/Augmented/RGB_202503010913_1_original_enhanced_darkenOriginal.jpg,202503010913_1,1,0,1/3/2025,,,594,/DSM/DSM_202503010913_1/DSM_202503010913_1_original_correctedTilt.tif',
 '/RGB/RGB_202503010913_1/Augmented/RGB_202503010913_1_original_enhanced_flipped.jpg,202503010913_1,1,0,1/3/2025,,,594,/DSM/DSM_202503010913_1/Augmented/DSM_202503010913_1_original_flipped.tif',
 '/