Skip to content

Commit

Permalink
Feat: Add to_csv.py(#2)
Browse files Browse the repository at this point in the history
To_csv.py extracts train.csv, valid.csv by dividing the images by the specified ratio.
  • Loading branch information
ptwd committed May 18, 2022
1 parent 11e6fa7 commit 9302843
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions Tools/to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import random
from pathlib import Path

############ Custom ############
PORTION = 20 ## 몇퍼센트(백분율)를 validation으로 사용할 것인지
SEED = 42
IMG_DIR_PATH = '/opt/ml/sample_data/images'
TRAIN_CSV_PATH = "./train.csv"
VALID_CSV_PATH = "./valid.csv"
############ Custom ############

random.seed(SEED)
train_list = []
valid_list = []

for actor in Path(IMG_DIR_PATH).iterdir():
if not actor.is_dir():
continue
image_path_list = []
count = 0
for image in actor.glob("*.jpg"):
count += 1
image_path_list.append(str(Path('').joinpath(*image.parts[-3: ])))

random.shuffle(image_path_list)

valid_num = count*PORTION // 100
train_path_list = sorted(image_path_list[valid_num:])
valid_path_list = sorted(image_path_list[:valid_num])

for image in train_path_list:
train_list.append([image, actor.name])

for image in valid_path_list:
valid_list.append([image, actor.name])


df_train = pd.DataFrame(data=train_list, columns = ['path','name'])
df_train.to_csv(TRAIN_CSV_PATH) ## train csv파일 위치 지정

df_valid = pd.DataFrame(data=valid_list, columns = ['path','name'])
df_valid.to_csv(VALID_CSV_PATH) ## valid csv파일이 저장될 위치 지정

0 comments on commit 9302843

Please sign in to comment.