In [1]:
import pandas as pd
import re
import os
import numpy as np
import glob
from datetime import datetime
import geopandas as gpd
import json
from shapely.geometry import shape, GeometryCollection
from rasterio.plot import show
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib import colors
from utils import FeatureExtraction, feature_matching, decimal_coords
from exif import Image as exifimg
from PIL import Image

import shutil
import rasterio
import cv2
%matplotlib inline

In [2]:
# 가능한 font list 확인
import matplotlib.font_manager as fm
f = [f.name for f in fm.fontManager.ttflist]
# print(f)

plt.rc('font', family='Malgun Gothic')

In [3]:
def getInfo(filesGrp, img_path):

  dslabel = {
    "id":"",
    "originalFileJPG":"",
    "originalFileGrp":[],
    "destFolder":"",
    "GeoTagInfo":{
      "coords":[],
      "datetime_original":"",
      "gps_altitude":0,
      "gps_altitude_ref":""    
    },
    "dest":{
      "rgb":"",
      "ndvi":"",
      "thumb":"",
    },
    "info":{
      "course":"",
      "area":"",
      "desc":"",
      "alt":""
    },
    "label":[],
    "annotation":[]
  }

# Remove Drive letter with root path
  dslabel['originalFileJPG'] = img_path[3:]
  dslabel['originalFileGrp'] = [x[3:] for x in filesGrp  if ('.JPG' not in x)]

  with open(img_path, 'rb') as src:
    img = exifimg(src)

  coords = [
    decimal_coords(img.gps_longitude,
    img.gps_longitude_ref),
      decimal_coords(img.gps_latitude,
    img.gps_latitude_ref)
  ]

  dslabel['id'] = img.datetime_original.replace(" ","").replace(":","") + "_{:.10f}".format(coords[0]).replace(".","") + "_{:.10f}".format(coords[1]).replace(".","") + "_{:f}".format(img.gps_altitude).replace(".","")
  dslabel['GeoTagInfo']['coords'] = coords
  dslabel['GeoTagInfo']['datetime_original'] = img.datetime_original
  dslabel['GeoTagInfo']['gps_altitude'] = img.gps_altitude
  dslabel['GeoTagInfo']['gps_altitude_ref'] = img.gps_altitude_ref
  
  return dslabel

In [4]:
def getNDVIimg(filesGrp):


  with rasterio.open([ x for x in filesGrp if ('_MS_R' in x) ][0]) as src:
      band_red = src.read(1).astype(float)/65536.

  with rasterio.open([ x for x in filesGrp if ('_MS_NIR' in x) ][0]) as src:
      band_nir = src.read(1).astype(float)/65536.

  np.seterr(divide='ignore', invalid='ignore')

  # Calculate NDVI
  ndvi = (band_nir.astype(float) - band_red.astype(float)) / (band_nir.astype(float) + band_red.astype(float))

  ndvi -= ndvi.min() # ensure the minimal value is 0.0
  ndvi /= ndvi.max() # maximum value in image is now 1.0

  cm = plt.cm.get_cmap('RdYlGn')
  ndvi_cm = cm(ndvi)

  ndvi_img = cv2.normalize(ndvi_cm[:,:,:3], None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F).astype(np.uint8)

  return ndvi_img

In [5]:
def getalignedRGB(filesGrp_, ndvi_img):
  RGBimg = np.asarray(Image.open([x for x in filesGrp_ if 'JPG' in x][0]))

  # RGBimg = np.asarray(Image.open(filesGrp_[0]).resize((ndvi_img.shape[1],ndvi_img.shape[0])))
  # features0 = FeatureExtraction(RGBimg)
  # features1 = FeatureExtraction(ndvi_img)

  # matches = feature_matching(features0, features1)

  # print('{} number of matched has been found! in {}'.format(len(matches), filesGrp_[0]))

  # if len(matches) < 10:
  #    raise Exception("Not Enough Matches")

  # H, _ = cv2.findHomography( features0.matched_pts, features1.matched_pts, cv2.RANSAC, 5.0)

  H = np.array([[ 5.93209530e-01, -8.35074813e-03, -2.21474434e+02],
       [ 1.32198817e-02,  5.96460559e-01, -2.01042419e+02],
       [ 1.58520522e-06,  1.69424781e-06,  1.00000000e+00]])

  h, w, c = ndvi_img.shape
  RGBimg_tuned = cv2.warpPerspective(RGBimg, H, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0))

  return RGBimg_tuned 

In [6]:
img_folder = r'N:\NIA_2023_52_잔디\10_원시데이터(3사_분류대상)'
out_folder = r'D:\ToAWS'

course_list = ['청도','포항','중문','시흥']

course_ids = {
  '포항':'MGC001',
  '청도':'MGC002',
  '중문':'MGC003',
  '시흥':'MGC004'
}

folder_type = ['rgb','ndvi','thumb']

In [7]:
course_id = course_ids.get('시흥')
course_id

'MGC004'

In [8]:
img_folders_candidate =glob.glob(os.path.join(img_folder,'*'))
img_folders_candidate

['N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0809_클럽디 거창_구름많은날_데이터 1차 분류_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0811_포항CC_비온다음날_데이터 1차 분류',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0815_시흥 아세코밸리_맑음_데이터 1차 분류본_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0816_포항CC_오전맑음오후구름가득_데이터 1차 분류본',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0817_거창클럽디_오전구름많이오후구름반햇빛반_데이터 1차 분류본_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0822_시흥아세코밸리_비온직후맑은날_데이터 1차 분류본_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0823_청도그레이스_맑고뭉게구름_데이터 1차 분류본_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0825_클럽디속리산_맑고뭉게구름_데이터1차분류_M3M_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0826_춘천오너스_맑고구름조금미세먼지_데이터 1차 분류_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0902_시흥아세코밸리_맑은날_구름조금_데이터 1차 분류_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0911_클럽디속리산-데이터1차분류_완',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_09

In [9]:
img_folder = img_folders_candidate[0]
img_folder

'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완'

In [10]:
files = glob.glob(os.path.join(img_folder,'**/DJI_*.JPG'), recursive=True)

len(files)

4054

In [11]:
original_folder = []

for file_ in files:
  path_ = os.path.normpath(file_)
  paths_ = path_.split(os.sep)
  path_ = (os.sep).join(paths_[0:5])

  if path_ not in original_folder:
    original_folder.append(path_)

In [12]:
original_folder

['N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완\\아세코밸리_1H',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완\\아세코밸리_5H',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완\\아세코밸리_8H',
 'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완\\아세코밸리_9H']

In [13]:
target_files = glob.glob(os.path.join(original_folder[0],'**/DJI_*.JPG'), recursive=True)
len(target_files)

473

In [14]:
file_

'N:\\NIA_2023_52_잔디\\10_원시데이터(3사_분류대상)\\2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완\\아세코밸리_9H\\2023_0807_시흥 아세코밸리_9H_1.5m\\20230807_1626-1632_병해,패취류,라지패취\\DJI_20230807163226_0335_D.JPG'

In [17]:
path = os.path.normpath(file_)
info = path.split(os.sep)

info

['N:',
 'NIA_2023_52_잔디',
 '10_원시데이터(3사_분류대상)',
 '2023_0807_시흥 아세코밸리_맑음_데이터 1차 분류본_완',
 '아세코밸리_9H',
 '2023_0807_시흥 아세코밸리_9H_1.5m',
 '20230807_1626-1632_병해,패취류,라지패취',
 'DJI_20230807163226_0335_D.JPG']

In [18]:

InfoJsonArray = []
target_folder = []

for idx, file_ in enumerate(target_files):

  course = list(filter(lambda y: y in file_, course_list))[0]
  target_date = os.path.split(file_)[-1].split('_')[1][:8]
  path = os.path.normpath(file_)
  info = path.split(os.sep)

  area = info[4]
  
  if len(info[6].split('_')) ==3 :
    desc = info[6].split('_')[-1] + '_0'
  else:
    desc = "미기재" + '_0'

  target_path = ('/').join([target_date,area,desc])

  while target_folder.count(target_path)>500:
    desc = desc.split('_')[0] + '_'+str(int(desc.split('_')[-1]) + 1)
    target_path = ('/').join([target_date,area,desc])

  target_folder.append(target_path)

  alt =  info[5].split('_')[-1]


  filesGrp = glob.glob(os.path.join(os.path.split(file_)[0],(os.path.split(file_)[1].split('_')[0]+'_'+ os.path.split(file_)[1].split('_')[1][:-3]+"***_" + os.path.split(file_)[1].split('_')[2]) + '*'), recursive=True)
  # print(filesGrp)
  file_info = getInfo(filesGrp, file_)

  file_info['info']['course'] = course
  file_info['info']['area'] = area
  file_info['info']['alt'] = alt
  file_info['info']['desc'] = desc
  file_info['info']['date'] = target_date
  file_info['destFolder'] = target_path

  path_ = os.path.join(out_folder,course_id,target_date,area,desc,'rgb')

  if not os.path.exists(path_):
    os.makedirs(path_)

  path_ = os.path.join(out_folder,course_id,target_date,area,desc,'ndvi')

  if not os.path.exists(path_):
    os.makedirs(path_)

  path_ = os.path.join(out_folder,course_id,target_date,area,desc,'thumb')

  if not os.path.exists(path_):
    os.makedirs(path_)


  # try:
  ndvi_img = getNDVIimg(filesGrp)
  rgb_img = getalignedRGB(filesGrp, ndvi_img)
  resized_ndvi = cv2.resize(ndvi_img, dsize=(648,486), interpolation=cv2.INTER_CUBIC)
  save_name = os.path.join(out_folder,course_id,target_date,area,desc,'ndvi','ndvi{}.JPG'.format( file_info['id']) )
  im = Image.fromarray(resized_ndvi)
  im.save(save_name)
  file_info['dest']['ndvi'] = save_name[9:]

  resized_rgb = cv2.resize(rgb_img , dsize=(2592,1944), interpolation=cv2.INTER_CUBIC)
  save_name = os.path.join(out_folder,course_id,target_date,area,desc,'rgb','rgb{}.JPG'.format( file_info['id']) )
  im = Image.fromarray(resized_rgb)
  im.save(save_name)
  file_info['dest']['rgb'] = save_name[9:]


  thumb_rgb = cv2.resize(rgb_img , dsize=(324,243), interpolation=cv2.INTER_CUBIC)
  save_name = os.path.join(out_folder,course_id,target_date,area,desc,'thumb','thumbrgb{}.JPG'.format( file_info['id']) )
  im = Image.fromarray(thumb_rgb)
  im.save(save_name)
  file_info['dest']['thumb'] = save_name[9:]

  InfoJsonArray.append( file_info)
  
  print(save_name)

  print('{} out of {} files Completed'.format(idx, len(target_files)))

save_name = os.path.join(out_folder,course_id,target_date,area, 'data'+target_date+area+'.json')
with open(save_name, "w", encoding='utf-8') as final:
   json.dump(InfoJsonArray, final , ensure_ascii=False)


  

  s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  cm = plt.cm.get_cmap('RdYlGn')


D:\ToAWS\MGC004\20230807\아세코밸리_1H\미기재_0\thumb\thumbrgb20230807111526_1267705616389_373680895556_56142000.JPG
0 out of 473 files Completed


  RGBimg = np.asarray(Image.open([x for x in filesGrp_ if 'JPG' in x][0]))


error: OpenCV(4.6.0) :-1: error: (-5:Bad argument) in function 'warpPerspective'
> Overload resolution failed:
>  - src data type = 17 is not supported
>  - Expected Ptr<cv::UMat> for argument 'src'


Gather all data json files from Output Folder

In [27]:
dataJsonList = glob.glob(os.path.join(out_folder,course_id,target_date,'**/data*.json'), recursive=True)
dataJsonList

['D:\\ToAWS\\MGC001\\20230816\\1.태백3H\\data202308161.태백3H.json',
 'D:\\ToAWS\\MGC001\\20230816\\2.태백5H\\data202308162.태백5H.json',
 'D:\\ToAWS\\MGC001\\20230816\\3.동해1H\\data202308163.동해1H.json',
 'D:\\ToAWS\\MGC001\\20230816\\4.동해2H\\data202308164.동해2H.json',
 'D:\\ToAWS\\MGC001\\20230816\\5.동해3H\\data202308165.동해3H.json',
 'D:\\ToAWS\\MGC001\\20230816\\6.태백1H\\data202308166.태백1H.json']

In [28]:
folderinDataJson = []
totalPhotoJson = []

for dataJson_ in dataJsonList:
  with open(dataJson_, "r") as _json:
    dataJson= json.load(_json)
  
  totalPhotoJson.extend(dataJson)

  for Json_ in dataJson:
    if Json_['destFolder'] not in folderinDataJson:
      folderinDataJson.append(Json_['destFolder'])




In [29]:
save_name = os.path.join(out_folder,course_id,target_date, 'photo.json')
with open(save_name, "w", encoding='utf-8') as final:
   json.dump(totalPhotoJson, final , ensure_ascii=False)

In [30]:
filestructure = []


for folder_ in folderinDataJson:
  fileinfo = {}
  fileinfo["path"] = folder_
  fileinfo["file_cnt"] = len([ x for x in totalPhotoJson if x["destFolder"] == folder_])
  fileinfo["labeled_file_cnt"] = 0
  filestructure.append(fileinfo)
  

In [31]:
save_name = os.path.join(out_folder,course_id,target_date, 'filepath_'+course_id + '_'+target_date+'.json')
with open(save_name, "w", encoding='utf-8') as final:
   json.dump(filestructure, final , ensure_ascii=False)

In [32]:
len(totalPhotoJson)

9184