### CSV DATA Downsampling (8,192)

##### Real World 1 (WOODY_DOWNSAMPLED)

In [6]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/RW01_8192_LX"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file)
    lw_df = df[["X", "Y", "Z"]]
    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00010000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00020000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00030000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00040000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00050000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00060000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00070000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00080000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00090000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00100000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00110000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00120000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00130000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00140000.csv
(8192, 3)
/bess25/heeju/DATA/LABELX/WOODY_DOWNSAMPLED/00150000.csv
(8192

##### Real World 2 (WOODY_DOWNSAMPLED2)

In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/WOODY_DOWNSAMPLED2"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/RW02_8192_LX"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file)

    lw_df = df[["X", "Y", "Z"]]
    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

##### Real World 3 (Leaf-on TREE_DATA)

In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/RAW/Tree_DATA"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/RW01_8192_LO"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file)
    lw_df = df[["X", "Y", "Z"]]
    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

##### Real World 4 (Leaf-on Street Tree Data)

In [7]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/RW02_8192_LO"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file, header=None)
    lw_df = df.iloc[:,:3]
    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE03.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE04.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE07.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE08.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE09.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE10.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE11.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE14.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE15.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/real-world/Tree_DATA2/0428TREE20.csv
(8192, 3)
/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/re

##### Synthetic 1 (HELIOS)

In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/synthetic/tls_simul"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/SY01_8192_LX"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file, header=None)
    lw_df = df.iloc[:,:3]

    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

##### Synthetic 2 (MESH Sampling)

In [None]:
import os
import pandas as pd
import numpy as np
from glob import glob
import torch
import sys
sys.path.append('/esail4/heeju/Point-M2AE/')
from utils.misc import fps

source_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/RAW/synthetic/sample_las2csv"
target_dir = "/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/SY02_8192_LX"

sample_size = 8192

os.makedirs(target_dir, exist_ok=True)

csv_files = sorted(glob(os.path.join(source_dir, "*.csv")))

for i, file in enumerate(csv_files):
    print(f'{file}')
    df = pd.read_csv(file)
    lw_df = df[["X", "Y", "Z"]]

    lw_points = torch.tensor(lw_df.values).cuda().float().contiguous().unsqueeze(0)

    if len(lw_points[0]) >= sample_size:
        sampled_points = fps(lw_points, sample_size)
        sampled_points = sampled_points.cpu().numpy().squeeze()
        sampled_df = pd.DataFrame(sampled_points, columns=["X", "Y", "Z"])
    else:
        sampled_df = pd.DataFrame(lw_points.cpu().numpy().squeeze(), columns=["X", "Y", "Z"])  # 안전상 처리 (매우 적은 경우)
        
    print(sampled_df.shape)
    filename = f"{str((i + 1) * 10000).zfill(8)}.csv"
    save_path = os.path.join(target_dir, filename)
    sampled_df.to_csv(save_path, index=False)

### CSV DATA filename

In [1]:
import os
import glob
import numpy as np
import math

# 원격 서버의 상위 폴더 경로 -> 로컬 경로로 대체
tree_npy_path = '/bess25/heeju/DATA/REGRESSION/M2AE_FINETUNE/tls_simul'

# 로컬 파일 시스템에서 상위 폴더 내의 모든 npy 파일 찾기
remote_tree_files = glob.glob(os.path.join(tree_npy_path, '*.csv'))
remote_tree_name = [os.path.basename(file_path) for file_path in remote_tree_files]

total_files = len(remote_tree_name)

group_ids = [None] * total_files
for i in range(total_files):
    group_ids[i] = remote_tree_name[i]

unq_group_ids = np.unique(group_ids)

id_length = len(unq_group_ids)

# train set과 extra set의 비율 설정
train_ratio = 0.7
test_ratio = 0.15
extra_ratio = 0.15

# train set의 파일 개수
train_count = math.ceil(id_length * train_ratio)

# test set의 파일 개수
test_count = math.ceil(id_length * test_ratio)

extra_count = math.ceil(id_length * extra_ratio)
# 파일을 랜덤하게 섞음
# random.shuffle(unq_group_ids)

# train set과 test set으로 파일을 나눔
test_set = unq_group_ids[:test_count]
train_set = unq_group_ids[test_count:-extra_count]
extra_set = unq_group_ids[-extra_count:]

# train_set
train_set_id = []
for ids in train_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    train_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    train_set_id.append(train_append_set)

train_set_list = []
for i in range(len(train_set_id)):
    for j in range(len(train_set_id[i])):
        train_set_list_append = train_set_id[i][j]
        train_set_list.append(train_set_list_append)

# test_set
test_set_id = []
for ids in test_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    test_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    test_set_id.append(test_append_set)

test_set_list = []
for i in range(len(test_set_id)):
    for j in range(len(test_set_id[i])):
        test_set_list_append = test_set_id[i][j]
        test_set_list.append(test_set_list_append)

extra_set_id = []
for ids in extra_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    extra_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    extra_set_id.append(extra_append_set)

extra_set_list = []
for i in range(len(extra_set_id)):
    for j in range(len(extra_set_id[i])):
        extra_set_list_append = extra_set_id[i][j]
        extra_set_list.append(extra_set_list_append)

print(len(train_set_id))
print(len(test_set_id))
print(len(extra_set_id))

destination = '/bess25/heeju/DATA/REGRESSION/M2AE_FINETUNE/tls_simul_txt'
os.makedirs(destination, exist_ok=True)
# 파일 저장 경로
train_txt_path = '/bess25/heeju/DATA/REGRESSION/M2AE_FINETUNE/tls_simul_txt/train.txt'
test_txt_path = '/bess25/heeju/DATA/REGRESSION/M2AE_FINETUNE/tls_simul_txt/test.txt'
extra_txt_path = '/bess25/heeju/DATA/REGRESSION/M2AE_FINETUNE/tls_simul_txt/extra_train.txt'

# train set 파일명을 train.txt에 저장
with open(train_txt_path, 'w') as train_file:
    train_file.write('\n'.join(train_set_list))

# test set 파일명을 test.txt에 저장
with open(test_txt_path, 'w') as test_file:
    test_file.write('\n'.join(test_set_list))

with open(extra_txt_path, 'w') as extra_file:
    extra_file.write('\n'.join(extra_set_list))


2799
600
600


### look up table

In [12]:
import os
import json
import pandas as pd
from tqdm import tqdm

input_dir = "/bess25/heeju/DATA/REGRESSION/hparams_flatten"
output_csv = "/bess25/heeju/DATA/REGRESSION/LUT.csv"

rows = []

for filename in tqdm(os.listdir(input_dir)):
    if not filename.endswith(".json"):
        continue
    
    file_path = os.path.join(input_dir, filename)
    filename = filename.split('.')[0]

    with open(file_path, "r") as f:
        hyper = json.load(f)

    # ensure all values are floats
    hyper = [float(x) for x in hyper]

    # append filename at the end
    hyper.append(filename.zfill(8))
    rows.append(hyper)

# construct column names: h1 ~ hN + 'filename'
num_h = len(rows[0]) - 1
columns = [f"h{i+1}" for i in range(num_h)] + ["filename"]


# create DataFrame and save
df = pd.DataFrame(rows)
df.to_csv(output_csv, index=False, header=False)

print(f"✅ CSV saved to {output_csv} with shape {df.shape}")


  0%|          | 0/3999 [00:00<?, ?it/s]

100%|██████████| 3999/3999 [00:00<00:00, 34886.81it/s]

✅ CSV saved to /bess25/heeju/DATA/REGRESSION/LUT.csv with shape (3999, 27)





### Pretrain_data check

#### gather

In [20]:
import os
import shutil
from glob import glob

# 원본 폴더 경로
source_folder = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Tree_DATA'
# 새 폴더 경로
destination_folder = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192'

# 새 폴더가 존재하지 않으면 생성
os.makedirs(destination_folder, exist_ok=True)

# 파일을 새로 복사하고 이름 변경할 카운터 초기화
counter = 0

# 모든 하위 폴더에서 CSV 파일 경로를 가져오기
csv_files = sorted(glob(os.path.join(source_folder, '**', '*.csv'), recursive=True))

# CSV 파일들을 복사하고 이름을 변경
for file in csv_files:
    # 새 파일 이름
    new_name = f"99999999-{str(counter).zfill(8)}.csv"
    # 새 파일 경로
    destination_file = os.path.join(destination_folder, new_name)
    # 파일 복사
    shutil.copy2(file, destination_file)
    counter += 1

print("파일 복사가 완료되었습니다.")


파일 복사가 완료되었습니다.


In [22]:
import os
import glob
import shutil
import pandas as pd
import numpy as np

# 원본 폴더 경로
source_dir = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192'
# 새 폴더 경로
target_dir = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree'

# 대상 폴더가 없으면 생성
os.makedirs(target_dir, exist_ok=True)

# CSV 파일들 경로 가져오기
csv_files = glob.glob(os.path.join(source_dir, "*.csv"))

# 파일 복사 (8192, 3 형태인 경우만)
for csv_file in csv_files:
    df = pd.read_csv(csv_file, header=None)  # 헤더를 무시하고 읽기
    print(f"Original shape of {csv_file}: {df.shape}")
    
    # 첫 번째 행이 컬럼 이름일 경우 제거
    df = df[1:]  # 첫 번째 행을 삭제
    arr = df.values.astype(np.float32)  # 데이터를 float32로 변환

    if arr.shape == (8192, 3):  # 데이터 shape가 (8192, 3)인 경우만 복사
        filename = os.path.basename(csv_file)  # 원본 파일 이름 가져오기
        target_file = os.path.join(target_dir, filename)  # 새로운 경로 설정
        
        # 파일 복사
        shutil.copy(csv_file, target_file)
        print(f"Copied {csv_file} to {target_file}")
    else:
        print(f"Skipped {csv_file} due to shape {arr.shape}")

print("파일 복사가 완료되었습니다.")


Original shape of /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000000.csv: (8193, 3)
Copied /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000000.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00000000.csv
Original shape of /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000001.csv: (8193, 3)
Copied /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000001.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00000001.csv
Original shape of /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000002.csv: (8193, 3)
Copied /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_train_8192/99999999-00000002.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/D

#### train test split

In [23]:
import os
import glob
import shutil
import random

# 원본 폴더 경로
source_dir = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree'
# 대상 폴더 경로 (test 데이터)
target_dir = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_test_8192'

# 대상 폴더가 없으면 생성
os.makedirs(target_dir, exist_ok=True)

# 파일 목록 가져오기
csv_files = glob.glob(os.path.join(source_dir, "*.csv"))

# 파일을 무작위로 섞기
random.shuffle(csv_files)

# 30%만 선택하여 test 데이터로 이동
test_size = int(0.3 * len(csv_files))

# test 데이터를 target_dir로 이동
for i in range(test_size):
    file_to_move = csv_files[i]
    target_file = os.path.join(target_dir, os.path.basename(file_to_move))  # 파일 이름 유지

    # 파일 이동
    shutil.move(file_to_move, target_file)
    print(f"Moved {file_to_move} to {target_file}")

print(f"Completed moving {test_size} files to the test folder.")


Moved /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00001666.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_test_8192/99999999-00001666.csv
Moved /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00004004.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_test_8192/99999999-00004004.csv
Moved /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00010473.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_test_8192/99999999-00010473.csv
Moved /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00009357.csv to /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_test_8192/99999999-00009357.csv
Moved /bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/shapenet_tree/99999999-00004518.csv to /bess25/heeju/DATA/REGRESSION/M2A

#### shapenet move (.npy -> .csv)

In [29]:
import os
import shutil
import numpy as np
import pandas as pd
from glob import glob

# 원본 폴더 경로
source_folder = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/shapenet_pc'
# 새 폴더 경로
destination_folder = '/bess25/heeju/DATA/REGRESSION/M2AE_PRETRAIN/PROCESSED/DOWNSAMPLED/Full_tree_for_train_8192'

# 새 폴더가 존재하지 않으면 생성
os.makedirs(destination_folder, exist_ok=True)

# 파일을 새로 복사하고 이름 변경할 카운터 초기화
counter = 0

# 모든 하위 폴더에서 .npy 파일 경로를 가져오기
npy_files = sorted(glob(os.path.join(source_folder, '*.npy'), recursive=True))

# .npy 파일들을 읽고 .csv로 변환 후 복사
for file in npy_files:
    # .npy 파일을 로드
    data = np.load(file)

    # 데이터가 (2048, 3) 형식인지 확인
    if data.shape != (8192, 3):
        print(f"Warning: {file} shape {data.shape}, expected (2048, 3). Skipping.")
        continue

    # 새 파일 이름
    new_name = f"{file.split('/')[-1].split('.')[0]}.csv"
    print(new_name)
    # 새 파일 경로
    destination_file = os.path.join(destination_folder, new_name)
    
    # .npy 데이터를 DataFrame으로 변환하고 .csv로 저장
    df = pd.DataFrame(data, columns=["X", "Y", "Z"])
    df.to_csv(destination_file, index=False)

    counter += 1

print("파일 복사 및 변환이 완료되었습니다.")


02691156-10155655850468db78d106ce0a280f87.csv
02691156-1021a0914a7207aff927ed529ad90a11.csv
02691156-1026dd1b26120799107f68a9cb8e3c.csv
02691156-103c9e43cdf6501c62b600da24e0965.csv
02691156-105f7f51e4140ee4b6b87e72ead132ed.csv
02691156-1066b65c30d153e04c3a35cee92bb95b.csv
02691156-106dfe858cb8fbc2afc6b80d80a265ab.csv
02691156-10aa040f470500c6a66ef8df4909ded9.csv
02691156-10af5de930178a161596c26b5af806fe.csv
02691156-10c7cdfdffe2243b88a89a28f04ce622.csv
02691156-10cfc2090a2ade124c3a35cee92bb95b.csv
02691156-10db820f0e20396a492c7ca609cb0182.csv
02691156-10e0a7255d279a419751c7a6f15617f4.csv
02691156-10e4331c34d610dacc14f1e6f4f4f49b.csv
02691156-10eeb119fd5508e0d6d949577c389a84.csv
02691156-110f6dbf0e6216e9f9a63e9a8c332e52.csv
02691156-112ca5420188df4bd90bfc986bc4c94d.csv
02691156-1169d987dbbce76775f4ea0b85a53249.csv
02691156-117830993cc5887726587cb13c78fb9b.csv
02691156-117861b9ebca4f17c69cb28fb4b4d257.csv
02691156-118e8142a8cb1fe19a4a28ef635593ce.csv
02691156-11d2af04fad0a7e2ce19d55bc5e6

#### shapenet txt

In [1]:
import os
import glob
import numpy as np
import math

# 원격 서버의 상위 폴더 경로 -> 로컬 경로로 대체
tree_npy_path = '/esail4/heeju/Saptree_DATA/pine/mesh_csv'

# 로컬 파일 시스템에서 상위 폴더 내의 모든 npy 파일 찾기
remote_tree_files = glob.glob(os.path.join(tree_npy_path, '*.csv'))
remote_tree_name = [os.path.basename(file_path) for file_path in remote_tree_files]

total_files = len(remote_tree_name)

group_ids = [None] * total_files
for i in range(total_files):
    group_ids[i] = remote_tree_name[i]

unq_group_ids = np.unique(group_ids)

id_length = len(unq_group_ids)

# train set과 extra set의 비율 설정
train_ratio = 0.7
test_ratio = 0.15
extra_ratio = 0.15

# train set의 파일 개수
train_count = math.ceil(id_length * train_ratio)

# test set의 파일 개수
test_count = math.ceil(id_length * test_ratio)

extra_count = math.ceil(id_length * extra_ratio)
# 파일을 랜덤하게 섞음
# random.shuffle(unq_group_ids)

# train set과 test set으로 파일을 나눔
test_set = unq_group_ids[:test_count]
train_set = unq_group_ids[test_count:-extra_count]
extra_set = unq_group_ids[-extra_count:]

# train_set
train_set_id = []
for ids in train_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    train_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    train_set_id.append(train_append_set)

train_set_list = []
for i in range(len(train_set_id)):
    for j in range(len(train_set_id[i])):
        train_set_list_append = train_set_id[i][j]
        train_set_list.append(train_set_list_append)

# test_set
test_set_id = []
for ids in test_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    test_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    test_set_id.append(test_append_set)

test_set_list = []
for i in range(len(test_set_id)):
    for j in range(len(test_set_id[i])):
        test_set_list_append = test_set_id[i][j]
        test_set_list.append(test_set_list_append)

extra_set_id = []
for ids in extra_set:
    # Find indices where group_ids equals the current unique ID
    indices = np.where(np.array(group_ids) == ids)[0]
    # Extract corresponding filenames from remote_tree_name
    extra_append_set = [remote_tree_name[i] for i in indices]
    # Append filenames to train_set_id
    extra_set_id.append(extra_append_set)

extra_set_list = []
for i in range(len(extra_set_id)):
    for j in range(len(extra_set_id[i])):
        extra_set_list_append = extra_set_id[i][j]
        extra_set_list.append(extra_set_list_append)

print(len(train_set_id))
print(len(test_set_id))
print(len(extra_set_id))

destination = '/esail4/heeju/Saptree_DATA/pine/mesh_txt'
os.makedirs(destination, exist_ok=True)
# 파일 저장 경로
train_txt_path = '/esail4/heeju/Saptree_DATA/pine/mesh_txt/train.txt'
test_txt_path = '/esail4/heeju/Saptree_DATA/pine/mesh_txt/test.txt'
extra_txt_path = '/esail4/heeju/Saptree_DATA/pine/mesh_txt/extra_train.txt'

# train set 파일명을 train.txt에 저장
with open(train_txt_path, 'w') as train_file:
    train_file.write('\n'.join(train_set_list))

# test set 파일명을 test.txt에 저장
with open(test_txt_path, 'w') as test_file:
    test_file.write('\n'.join(test_set_list))

with open(extra_txt_path, 'w') as extra_file:
    extra_file.write('\n'.join(extra_set_list))


5183
1111
1111
