In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cv2
from google.colab.patches import cv2_imshow

from tqdm import tqdm

import time
import shutil
import os
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
csv_path = '/content/drive/MyDrive/project3/data/traindata/read_file'

csv_files = os.listdir(csv_path)

# Filter out the CSV files
csv_files = [file for file in csv_files if file.endswith('.csv')]

# Initialize an empty list to hold the dataframes
temp = []

# Read each CSV file and append to the list
for csv_file in csv_files:
    file_path = os.path.join(csv_path, csv_file)
    df = pd.read_csv(file_path)
    temp.append(df)


In [4]:
# Concatenate all dataframes
df = pd.concat(temp, ignore_index=True)
df = df[ (df['type'] == 'crop') | (df['type'] == 'raw')]
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,file_path,type
0,/content/drive/MyDrive/project3/data/traindata...,raw
1,/content/drive/MyDrive/project3/data/traindata...,raw
2,/content/drive/MyDrive/project3/data/traindata...,raw
3,/content/drive/MyDrive/project3/data/traindata...,raw
4,/content/drive/MyDrive/project3/data/traindata...,raw


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150283 entries, 0 to 150282
Data columns (total 2 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   file_path  150283 non-null  object
 1   type       150283 non-null  object
dtypes: object(2)
memory usage: 2.3+ MB


In [6]:
# 중분류
df['file_path'][0].replace('/content/drive/MyDrive/project3/data/traindata/raw_image/', '').split('/')[0]

'구이'

In [7]:
# 소분류
df['file_path'][0].replace('/content/drive/MyDrive/project3/data/traindata/raw_image/', '').split('/')[1]

'갈비구이'

In [8]:
df['middle_class'] = df.apply(lambda row: row['file_path'].replace('/content/drive/MyDrive/project3/data/traindata/raw_image/', '').split('/')[0]
                              if row['type'] == 'raw'
                              else row['file_path'].replace('/content/drive/MyDrive/project3/data/traindata/crop_image/', '').split('/')[0], axis=1)
df['small_class'] = df.apply(lambda row: row['file_path'].replace('/content/drive/MyDrive/project3/data/traindata/raw_image/', '').split('/')[1]
                              if row['type'] == 'raw'
                              else row['file_path'].replace('/content/drive/MyDrive/project3/data/traindata/crop_image/', '').split('/')[1], axis=1)
df['food_class'] = df['middle_class'] + '/' + df['small_class']
df.head()


Unnamed: 0,file_path,type,middle_class,small_class,food_class
0,/content/drive/MyDrive/project3/data/traindata...,raw,구이,갈비구이,구이/갈비구이
1,/content/drive/MyDrive/project3/data/traindata...,raw,구이,갈비구이,구이/갈비구이
2,/content/drive/MyDrive/project3/data/traindata...,raw,구이,갈비구이,구이/갈비구이
3,/content/drive/MyDrive/project3/data/traindata...,raw,구이,갈비구이,구이/갈비구이
4,/content/drive/MyDrive/project3/data/traindata...,raw,구이,갈비구이,구이/갈비구이


In [9]:
middle_class = df['middle_class'].unique().tolist()
small_class = df['small_class'].unique().tolist()
food_class = df['food_class'].unique().tolist()
print(food_class)
print(len(food_class))

['구이/갈비구이', '구이/고등어구이', '구이/곱창구이', '구이/닭갈비', '구이/더덕구이', '구이/갈치구이', '구이/떡갈비', '구이/불고기', '구이/삼겹살', '구이/장어구이', '구이/조개구이', '구이/조기구이', '구이/황태구이', '구이/훈제오리', '국/계란국', '국/떡국_만두국', '국/무국', '국/미역국', '국/북엇국', '국/시래기국', '국/육개장', '국/콩나물국', '기타/과메기', '기타/양념치킨', '기타/젓갈', '기타/콩자반', '기타/편육', '기타/피자', '기타/후라이드치킨', '김치/갓김치', '김치/깍두기', '김치/나박김치', '김치/무생채', '김치/배추김치', '김치/백김치', '김치/부추김치', '김치/열무김치', '김치/오이소박이', '김치/총각김치', '김치/파김치', '나물/가지볶음', '나물/고사리나물', '나물/미역줄기볶음', '나물/숙주나물', '나물/시금치나물', '나물/애호박볶음', '떡/경단', '떡/꿀떡', '떡/송편', '만두/만두', '면/라면', '면/막국수', '면/물냉면', '면/비빔냉면', '면/수제비', '면/열무국수', '며

In [10]:
# import concurrent.futures

# def copy_file(row, new_path):
#     file_path = row['file_path']
#     shutil.copy(file_path, new_path)

# def process_class(name):
#     new_path = '/content/drive/MyDrive/project3/data/traindata/new_image/' + name
#     os.makedirs(new_path, exist_ok=True)
#     temp = df[df['food_class'] == name]
#     with concurrent.futures.ThreadPoolExecutor() as executor:
#         futures = [executor.submit(copy_file, row, new_path) for index, row in temp.iterrows()]
#         for future in concurrent.futures.as_completed(futures):
#             future.result()  # Check for exceptions

# for name in tqdm(food_class):
#     process_class(name)
#     time.sleep(30)



In [None]:
for name in tqdm(food_class):
    new_path = '/content/drive/MyDrive/project3/data/traindata/new_image/' + name
    print(new_path)
    if not os.path.exists(new_path):
        os.makedirs(new_path)
    time.sleep(10)
    temp = df[df['food_class'] == name]
    for index, row in temp.iterrows():
        file_path = row['file_path']
        shutil.copy(file_path, new_path)
        # time.sleep(0.5)
    time.sleep(30)

  0%|          | 0/150 [00:00<?, ?it/s]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/갈비구이


  1%|          | 1/150 [00:51<2:08:49, 51.87s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/고등어구이


  1%|▏         | 2/150 [01:42<2:06:06, 51.12s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/곱창구이


  2%|▏         | 3/150 [02:33<2:04:59, 51.02s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/닭갈비


  3%|▎         | 4/150 [03:24<2:04:24, 51.13s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/더덕구이


  3%|▎         | 5/150 [04:15<2:03:23, 51.06s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/갈치구이


  4%|▍         | 6/150 [05:06<2:02:01, 50.85s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/떡갈비


  5%|▍         | 7/150 [05:57<2:01:48, 51.11s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/불고기


  5%|▌         | 8/150 [06:49<2:01:25, 51.30s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/삼겹살


  6%|▌         | 9/150 [07:39<1:59:50, 50.99s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/장어구이


  7%|▋         | 10/150 [08:30<1:58:43, 50.89s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/조개구이


  7%|▋         | 11/150 [09:20<1:57:30, 50.72s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/조기구이


  8%|▊         | 12/150 [10:10<1:56:13, 50.54s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/황태구이


  9%|▊         | 13/150 [11:01<1:55:19, 50.51s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/구이/훈제오리


  9%|▉         | 14/150 [11:51<1:54:19, 50.44s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/계란국


 10%|█         | 15/150 [12:42<1:53:54, 50.63s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/떡국_만두국


 11%|█         | 16/150 [13:33<1:53:00, 50.60s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/무국


 11%|█▏        | 17/150 [14:23<1:52:00, 50.53s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/미역국


 12%|█▏        | 18/150 [15:13<1:50:56, 50.43s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/북엇국


 13%|█▎        | 19/150 [16:04<1:50:04, 50.42s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/시래기국


 13%|█▎        | 20/150 [16:54<1:49:04, 50.34s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/육개장


 14%|█▍        | 21/150 [17:44<1:48:14, 50.35s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/국/콩나물국


 15%|█▍        | 22/150 [20:38<3:06:32, 87.44s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/기타/과메기


 15%|█▌        | 23/150 [25:20<5:08:28, 145.74s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/기타/양념치킨


 16%|█▌        | 24/150 [30:02<6:32:08, 186.73s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/기타/젓갈


 17%|█▋        | 25/150 [34:49<7:31:48, 216.87s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/기타/콩자반


 17%|█▋        | 26/150 [40:20<8:38:53, 251.08s/it]

/content/drive/MyDrive/project3/data/traindata/new_image/기타/편육
