<a href="https://colab.research.google.com/github/hojunking/carbon_reduction_project/blob/main/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import

In [4]:
import random
import pandas as pd
import numpy as np
import os
import cv2

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

## Hyperparameter Setting

In [6]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':10,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':64,
    'SEED':41
}

## Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
main_path = '/content/drive/MyDrive/탄소저감프로젝트/data/'
label_list = ["10kwalking","public_vehicle",'stair']

total_train_img_paths = []
total_train_img_labels = []
total_test_img_paths = []
total_test_img_labels = []

for label in label_list: ## 각 레이블 돌기
  print(f'label: {label}')
  img_paths = [] 
  img_labels = []

  # default ratio
  train_ratio = 100
  test_ratio = 40

  dir_path = main_path + label ## 레이블 폴더 경로
  for folder, subfolders, filenames in os.walk(dir_path): ## 폴더 내 모든 파일 탐색
    
    for img in filenames: ## 각 파일 경로, 레이블 저장
      img_paths.append(folder+'/'+img)
      img_labels.append(label)
  print(f'img_paths len : {len(img_paths)}\n')

  if label == '10kwalking': ## 10walking 데이터 비율 설정하기 (데이터수: 2494)
    train_ratio = 120
    test_ratio = 30
  elif label == 'public_vehicle': ## 10walking 데이터 비율 설정하기 (데이터수: 2494)
    train_ratio = 30
    test_ratio = 10

  total_train_img_paths.extend(img_paths[:train_ratio])
  total_train_img_labels.extend(img_paths[:train_ratio])
  
  total_test_img_paths.extend(img_paths[-test_ratio:])
  total_test_img_labels.extend(img_paths[-test_ratio:])

print('Train_Images: ',len(total_train_img_paths))
print("Train_Images_labels:", len(total_train_img_labels))
print('Test_Images: ',len(total_test_img_paths))
print("Test_Images_labels:", len(total_test_img_labels))

label: 10kwalking
img_paths len : 2494

label: public_vehicle
img_paths len : 1245

label: stair
img_paths len : 1573

Train_Images:  250
Train_Images_labels: 250
Test_Images:  80
Test_Images_labels: 80


In [None]:
total_train_img_paths

In [35]:
## Pandas 데이터프레임 만들기
trn_df = pd.DataFrame(total_train_img_paths, columns=['image_id'])
trn_df['dir'] = trn_df['image_id'].apply(lambda x: os.path.dirname(x))
trn_df['image_id'] = trn_df['image_id'].apply(lambda x: os.path.basename(x))
trn_df['label'] = total_train_img_labels
train = trn_df
train

Unnamed: 0,image_id,dir,label
0,캐시워크 만보_352.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
1,캐시워크 만보_354.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
2,캐시워크 만보_355.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
3,캐시워크 만보_353.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
4,캐시워크 만보_356.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
...,...,...,...
245,지하철 계단_061.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
246,지하철 계단_063.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
247,지하철 계단_064.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...
248,지하철 계단_065.jpg,/content/drive/MyDrive/탄소저감프로젝트/dat...,/content/drive/MyDrive/탄소저감프로젝트/dat...


In [None]:
# Label Encoding
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'].values)