In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from datetime import datetime
from pytz import timezone
import numpy as np
from gensim.models import KeyedVectors

root_dir = "/kaggle/working/"
root_imageset_dir = "/kaggle/input/fashion-product-images-small/images"

now = datetime.now(timezone('Asia/Seoul'))
folder_name = now.strftime("%Y-%m-%d_%H_%M_%S")
root_work_dir = os.path.join(root_dir, folder_name)
os.mkdir(root_work_dir)
root_work_weight_dir = os.path.join(root_work_dir, "weights")
os.mkdir(root_work_weight_dir)

y_columns = ['gender', 'articleType', 'season', 'usage']
x_columns = ['masterCategory', 'subCategory', 'baseColour', 'year', 'productDisplayName']
# Load the data
data = pd.read_csv('/kaggle/input/fashion-product-images-small/styles.csv', on_bad_lines='skip')

# Define common placeholders for missing values
missing_value_placeholders = ['', ' ', '-', 'None', 'NA', 'N/A', 'null']

# Convert all placeholders to NaN
for column in y_columns + x_columns:
    data[column].replace(missing_value_placeholders, np.nan, inplace=True)

# Handle missing values - Ensure no NaNs
for column in y_columns + x_columns:
    if data[column].isnull().sum() > 0:  # Check if there are any NaNs
        data[column].fillna(data[column].mode().iloc[0], inplace=True)

# Label encoding for categorical features
label_encoders = {}
for column in y_columns + x_columns[:-1]:  # Skip productDisplayName
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Check for missing values in each column
for column in data.columns:
    missing_indices = data[data[column].isnull()].index.tolist()
    if missing_indices:
        print(f"'{column}' 열의 결측치가 있습니다: {missing_indices}")
    else:
        print(f"'{column}' 열에는 결측치가 없습니다.")

# List all image files in the directory
image_files = os.listdir(root_imageset_dir)
image_files = [f for f in image_files if f.endswith('.jpg')]

# Extract IDs from image filenames
image_ids = {os.path.splitext(f)[0] for f in image_files}

# Get the IDs from the DataFrame
data_ids = set(data['id'].astype(str))

# Find IDs in data that do not have corresponding images
missing_image_ids = data_ids - image_ids
if missing_image_ids:
    print(f"다음 ID는 이미지가 없습니다: {missing_image_ids}")

# Filter the DataFrame to only include rows with available images
data = data[data['id'].astype(str).isin(image_ids)].reset_index(drop=True)

# Split into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(
    data[['id'] + x_columns], 
    data[y_columns], 
    test_size=0.3, 
    random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, 
    y_temp, 
    test_size=0.5, 
    random_state=42
)

print("data-preprocessing end..\n")

# Load Word2Vec embeddings
word2vec_path = '/kaggle/input/googlenewsvectorsnegative300/GoogleNews-vectors-negative300.bin'  # Word2Vec 모델 경로
word2vec = KeyedVectors.load_word2vec_format(word2vec_path, binary=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Function to embed product display name using Word2Vec
def embed_product_display_name(product_display_names):
    embedded_vectors = []
    for name in product_display_names:
        words = name.split()
        word_vectors = [word2vec[word] for word in words if word in word2vec]
        if word_vectors:
            embedded_vectors.append(np.mean(word_vectors, axis=0))
        else:
            embedded_vectors.append(np.zeros(300))  # Word2Vec 벡터 차원
    return torch.tensor(embedded_vectors, dtype=torch.float32).to(device)

class FashionDataset(Dataset):
    def __init__(self, X, y, root_dir, transform=None):
        self.X = X.reset_index(drop=True)  # Reset index to ensure consistency
        self.y = y.reset_index(drop=True)  # Reset index to ensure consistency
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, str(self.X.iloc[idx]['id']) + '.jpg')
        image = Image.open(img_name).convert('RGB')  # Ensure image is RGB

        if self.transform:
            image = self.transform(image)

        # Access the label using the correct column structure
        gender_idx = int(self.y.iloc[idx, 0])  # Adjust indexing to single integer
        article_idx = int(self.y.iloc[idx, 1])
        season_idx = int(self.y.iloc[idx, 2])
        usage_idx = int(self.y.iloc[idx, 3])

        # Encode additional features
        master_category_idx = float(self.X.iloc[idx]['masterCategory'])
        sub_category_idx = float(self.X.iloc[idx]['subCategory'])
        base_colour_idx = float(self.X.iloc[idx]['baseColour'])
        year = float(self.X.iloc[idx]['year'])

        # Get the text embedding
        product_display_name_embedding = embed_product_display_name([self.X.iloc[idx]['productDisplayName']])

        # Convert labels to one-hot encoded vectors
        gender_one_hot = np.eye(num_gender)[gender_idx]
        article_one_hot = np.eye(num_article)[article_idx]
        season_one_hot = np.eye(num_season)[season_idx]
        usage_one_hot = np.eye(num_usage)[usage_idx]

        labels = {
            'gender': torch.tensor(gender_one_hot, dtype=torch.float),
            'articleType': torch.tensor(article_one_hot, dtype=torch.float),
            'season': torch.tensor(season_one_hot, dtype=torch.float),
            'usage': torch.tensor(usage_one_hot, dtype=torch.float)
        }

        features = {
            'image': image,
            'masterCategory': torch.tensor(master_category_idx, dtype=torch.float).to(device),
            'subCategory': torch.tensor(sub_category_idx, dtype=torch.float).to(device),
            'baseColour': torch.tensor(base_colour_idx, dtype=torch.float).to(device),
            'year': torch.tensor(year, dtype=torch.float).to(device),
            'productDisplayName': product_display_name_embedding.squeeze(0)  # Ensure it is 1D for concatenation
        }

        return features, labels

# Data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets with consistent label structure
train_dataset = FashionDataset(
    X_train,
    y_train,
    root_dir=root_imageset_dir,
    transform=transform
)

val_dataset = FashionDataset(
    X_val,
    y_val,
    root_dir=root_imageset_dir,
    transform=transform
)

test_dataset = FashionDataset(
    X_test,
    y_test,
    root_dir=root_imageset_dir,
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("data-loader end..\n")

# Function to get the backbone model
def get_backbone(backbone_name, pretrained=True):
    if backbone_name == "resnet18":
        return models.resnet18(pretrained=pretrained)
    elif backbone_name == "resnet50":
        return models.resnet50(pretrained=pretrained)
    elif backbone_name == "efficientnet_b0":
        return models.efficientnet_b0(pretrained=pretrained)
    else:
        raise ValueError(f"Unsupported backbone: {backbone_name}")

# Define the model
class FashionModel(nn.Module):
    def __init__(self, num_gender, num_article, num_season, num_usage, num_additional_features, backbone_name="resnet18"):
        super(FashionModel, self).__init__()
        self.backbone = get_backbone(backbone_name)
        # EfficientNet uses `classifier` as the final layer
        if hasattr(self.backbone, 'fc'):
            num_features = self.backbone.fc.in_features  # ResNet-style models
            self.backbone.fc = nn.Identity()
        elif hasattr(self.backbone, 'classifier'):
            num_features = self.backbone.classifier[-1].in_features  # EfficientNet
            self.backbone.classifier = nn.Identity()

        # Combine image features with additional features
        self.fc1 = nn.Linear(num_features + num_additional_features, 512)
        self.dropout = nn.Dropout(0.5)
        
        # Define new classification layers using the retrieved num_features
        self.gender_classifier = nn.Linear(512, num_gender)
        self.article_classifier = nn.Linear(512, num_article)
        self.season_classifier = nn.Linear(512, num_season)
        self.usage_classifier = nn.Linear(512, num_usage)

    def forward(self, x_image, x_features):
        image_features = self.backbone(x_image)
        combined_features = torch.cat((image_features, x_features), dim=1)
        x = self.fc1(combined_features)
        x = self.dropout(x)
        x = torch.relu(x)

        gender_output = self.gender_classifier(x)
        article_output = self.article_classifier(x)
        season_output = self.season_classifier(x)
        usage_output = self.usage_classifier(x)

        return gender_output, article_output, season_output, usage_output

# Initialize the model and move to GPU
num_gender = len(label_encoders['gender'].classes_)
num_article = len(label_encoders['articleType'].classes_)
num_season = len(label_encoders['season'].classes_)
num_usage = len(label_encoders['usage'].classes_)
num_additional_features = 4 + 300  # 4 for categorical and year, 300 for Word2Vec embedding

backbone_name = "efficientnet_b0"  # Change this to switch backbones
model = FashionModel(num_gender, num_article, num_season, num_usage, num_additional_features, backbone_name).to(device)

# Define cross-entropy class
class CE_Loss(nn.Module):
    def __init__(self):
        super(CE_Loss, self).__init__()
    def forward(self, inputs, targets, size_average=False):
        logsoftmax = nn.LogSoftmax(dim=1)
        if size_average:
            return torch.mean(torch.sum(-targets * logsoftmax(inputs), dim=1))
        else:
            return torch.sum(torch.sum(-targets * logsoftmax(inputs), dim=1))

# Define the loss function and optimizer
criterion = CE_Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and validation loop
best_f1_score = 0.0
max_epoch = 0

# Calculate validation performance metrics
def evaluate_performance(true, pred):
    accuracy = accuracy_score(true, pred)
    precision = precision_score(true, pred, average='weighted')
    recall = recall_score(true, pred, average='weighted')
    f1 = f1_score(true, pred, average='weighted')
    return accuracy, precision, recall, f1

def save_log(root_log_dir, content):
    with open(os.path.join(root_log_dir, 'max_epoch_log.txt'), 'a') as f:
        f.write("{}".format(content))

print("start Training..\n")
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for features, labels in train_loader:
        images = features['image'].to(device).float()  # Ensure images are FloatTensor
        additional_features = torch.cat([
            features['masterCategory'].unsqueeze(1),
            features['subCategory'].unsqueeze(1),
            features['baseColour'].unsqueeze(1),
            features['year'].unsqueeze(1),
            features['productDisplayName'].to(device)  # Ensure it is on the same device
        ], dim=1)

        labels = {key: value.to(device) for key, value in labels.items()}  # Move labels to device
        optimizer.zero_grad()
        # Forward pass
        gender_output, article_output, season_output, usage_output = model(images, additional_features)

        # Calculate loss for each task
        loss_gender = criterion(gender_output, labels['gender'])
        loss_article = criterion(article_output, labels['articleType'])
        loss_season = criterion(season_output, labels['season'])
        loss_usage = criterion(usage_output, labels['usage'])

        # Total loss
        loss = loss_gender + loss_article + loss_season + loss_usage
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation phase
    model.eval()
    val_gender_true = []
    val_gender_pred = []
    val_article_true = []
    val_article_pred = []
    val_season_true = []
    val_season_pred = []
    val_usage_true = []
    val_usage_pred = []

    with torch.no_grad():
        for features, labels in val_loader:
            images = features['image'].to(device).float()
            additional_features = torch.cat([
                features['masterCategory'].unsqueeze(1),
                features['subCategory'].unsqueeze(1),
                features['baseColour'].unsqueeze(1),
                features['year'].unsqueeze(1),
                features['productDisplayName'].to(device)  # Ensure it is on the same device
            ], dim=1)

            labels = {key: value.to(device) for key, value in labels.items()}

            gender_output, article_output, season_output, usage_output = model(images, additional_features)

            _, predicted_gender = torch.max(gender_output, 1)
            _, predicted_article = torch.max(article_output, 1)
            _, predicted_season = torch.max(season_output, 1)
            _, predicted_usage = torch.max(usage_output, 1)

            val_gender_true.extend(torch.argmax(labels['gender'], dim=1).cpu().numpy())
            val_gender_pred.extend(predicted_gender.cpu().numpy())
            val_article_true.extend(torch.argmax(labels['articleType'], dim=1).cpu().numpy())
            val_article_pred.extend(predicted_article.cpu().numpy())
            val_season_true.extend(torch.argmax(labels['season'], dim=1).cpu().numpy())
            val_season_pred.extend(predicted_season.cpu().numpy())
            val_usage_true.extend(torch.argmax(labels['usage'], dim=1).cpu().numpy())
            val_usage_pred.extend(predicted_usage.cpu().numpy())

    gender_metrics = evaluate_performance(val_gender_true, val_gender_pred)
    article_metrics = evaluate_performance(val_article_true, val_article_pred)
    season_metrics = evaluate_performance(val_season_true, val_season_pred)
    usage_metrics = evaluate_performance(val_usage_true, val_usage_pred)

    avg_f1_score = (gender_metrics[3] + article_metrics[3] + season_metrics[3] + usage_metrics[3]) / 4
    avg_acc = (gender_metrics[0] + article_metrics[0] + season_metrics[0] + usage_metrics[0]) / 4
    
    # Save the best model
    if avg_f1_score > best_f1_score:
        max_epoch = epoch + 1
        best_f1_score = avg_f1_score
        content = "epoch(valid):{},{}\n".format(max_epoch, best_f1_score)
        save_log(root_work_dir, content)

    torch.save(model.state_dict(), os.path.join(root_work_weight_dir, "train_{}.pth".format(epoch + 1)))

    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Training Loss: {running_loss/len(train_loader):.4f}, '
          f'Validation F1 Score: {avg_f1_score:.4f}, '
          f'Validation gender F1 Score: {gender_metrics[3]:.4f}, ' 
          f'Validation article F1 Score: {article_metrics[3]:.4f}, '
          f'Validation season F1 Score: {season_metrics[3]:.4f}, '
          f'Validation usage_metrics F1 Score: {usage_metrics[3]:.4f}, '
          f'Validation Acc Score: {avg_acc:.4f}, '
         )


print('Training completed\n')

print('Test start..\n')
# Load the saved model
model.load_state_dict(torch.load(os.path.join(root_work_weight_dir,"train_{}.pth".format(max_epoch))))
# Evaluate performance on the test set
model.eval()
test_gender_true = []
test_gender_pred = []
test_article_true = []
test_article_pred = []
test_season_true = []
test_season_pred = []
test_usage_true = []
test_usage_pred = []
with torch.no_grad():
    for features, labels in test_loader:
        images = features['image'].to(device).float()
        additional_features = torch.cat([
            features['masterCategory'].unsqueeze(1),
            features['subCategory'].unsqueeze(1),
            features['baseColour'].unsqueeze(1),
            features['year'].unsqueeze(1),
            features['productDisplayName'].to(device)  # Ensure it is on the same device
        ], dim=1)

        labels = {key: value.to(device) for key, value in labels.items()}

        gender_output, article_output, season_output, usage_output = model(images, additional_features)

        _, predicted_gender = torch.max(gender_output, 1)
        _, predicted_article = torch.max(article_output, 1)
        _, predicted_season = torch.max(season_output, 1)
        _, predicted_usage = torch.max(usage_output, 1)

        test_gender_true.extend(torch.argmax(labels['gender'], dim=1).cpu().numpy())
        test_gender_pred.extend(predicted_gender.cpu().numpy())
        test_article_true.extend(torch.argmax(labels['articleType'], dim=1).cpu().numpy())
        test_article_pred.extend(predicted_article.cpu().numpy())
        test_season_true.extend(torch.argmax(labels['season'], dim=1).cpu().numpy())
        test_season_pred.extend(predicted_season.cpu().numpy())
        test_usage_true.extend(torch.argmax(labels['usage'], dim=1).cpu().numpy())
        test_usage_pred.extend(predicted_usage.cpu().numpy())

    gender_metrics = evaluate_performance(test_gender_true, test_gender_pred)
    article_metrics = evaluate_performance(test_article_true, test_article_pred)
    season_metrics = evaluate_performance(test_season_true, test_season_pred)
    usage_metrics = evaluate_performance(test_usage_true, test_usage_pred)

    avg_f1_score = (gender_metrics[3] + article_metrics[3] + season_metrics[3] + usage_metrics[3]) / 4
    avg_acc = (gender_metrics[0] + article_metrics[0] + season_metrics[0] + usage_metrics[0]) / 4
    content = "epoch(test):{},{}\n".format(max_epoch, best_f1_score)
    save_log(root_work_dir, content)    
    print(f'Epoch [{max_epoch}/{num_epochs}], '
          f'Test F1 Score: {avg_f1_score:.4f}, '
          f'Test gender F1 Score: {gender_metrics[3]:.4f}, ' 
          f'Test article F1 Score: {article_metrics[3]:.4f}, '
          f'Test season F1 Score: {season_metrics[3]:.4f}, '
          f'Test usage_metrics F1 Score: {usage_metrics[3]:.4f}, '
          f'Test Acc Score: {avg_acc:.4f}, '
         )


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].replace(missing_value_placeholders, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[column].fillna(data[column].mode().iloc[0], inplace=True)


'id' 열에는 결측치가 없습니다.
'gender' 열에는 결측치가 없습니다.
'masterCategory' 열에는 결측치가 없습니다.
'subCategory' 열에는 결측치가 없습니다.
'articleType' 열에는 결측치가 없습니다.
'baseColour' 열에는 결측치가 없습니다.
'season' 열에는 결측치가 없습니다.
'year' 열에는 결측치가 없습니다.
'usage' 열에는 결측치가 없습니다.
'productDisplayName' 열에는 결측치가 없습니다.
다음 ID는 이미지가 없습니다: {'39410', '39401', '39425', '12347', '39403'}
data-preprocessing end..

data-loader end..



Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 141MB/s]


start Training..



  return torch.tensor(embedded_vectors, dtype=torch.float32).to(device)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/20], Training Loss: 71.0928, Validation F1 Score: 0.8902, Validation gender F1 Score: 0.9729, Validation article F1 Score: 0.8748, Validation season F1 Score: 0.7922, Validation usage_metrics F1 Score: 0.9208, Validation Acc Score: 0.8949, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [2/20], Training Loss: 39.8058, Validation F1 Score: 0.9121, Validation gender F1 Score: 0.9812, Validation article F1 Score: 0.9168, Validation season F1 Score: 0.8248, Validation usage_metrics F1 Score: 0.9257, Validation Acc Score: 0.9153, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [3/20], Training Loss: 33.0062, Validation F1 Score: 0.9216, Validation gender F1 Score: 0.9842, Validation article F1 Score: 0.9338, Validation season F1 Score: 0.8352, Validation usage_metrics F1 Score: 0.9333, Validation Acc Score: 0.9235, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [4/20], Training Loss: 29.0433, Validation F1 Score: 0.9261, Validation gender F1 Score: 0.9868, Validation article F1 Score: 0.9393, Validation season F1 Score: 0.8407, Validation usage_metrics F1 Score: 0.9377, Validation Acc Score: 0.9277, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [5/20], Training Loss: 26.1772, Validation F1 Score: 0.9277, Validation gender F1 Score: 0.9863, Validation article F1 Score: 0.9423, Validation season F1 Score: 0.8447, Validation usage_metrics F1 Score: 0.9376, Validation Acc Score: 0.9297, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [6/20], Training Loss: 24.2056, Validation F1 Score: 0.9326, Validation gender F1 Score: 0.9867, Validation article F1 Score: 0.9494, Validation season F1 Score: 0.8582, Validation usage_metrics F1 Score: 0.9362, Validation Acc Score: 0.9340, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [7/20], Training Loss: 22.2089, Validation F1 Score: 0.9328, Validation gender F1 Score: 0.9882, Validation article F1 Score: 0.9485, Validation season F1 Score: 0.8535, Validation usage_metrics F1 Score: 0.9408, Validation Acc Score: 0.9346, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [8/20], Training Loss: 20.3868, Validation F1 Score: 0.9369, Validation gender F1 Score: 0.9933, Validation article F1 Score: 0.9544, Validation season F1 Score: 0.8558, Validation usage_metrics F1 Score: 0.9440, Validation Acc Score: 0.9378, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [9/20], Training Loss: 18.7544, Validation F1 Score: 0.9385, Validation gender F1 Score: 0.9909, Validation article F1 Score: 0.9574, Validation season F1 Score: 0.8581, Validation usage_metrics F1 Score: 0.9477, Validation Acc Score: 0.9402, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [10/20], Training Loss: 17.2972, Validation F1 Score: 0.9400, Validation gender F1 Score: 0.9926, Validation article F1 Score: 0.9582, Validation season F1 Score: 0.8660, Validation usage_metrics F1 Score: 0.9433, Validation Acc Score: 0.9411, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [11/20], Training Loss: 16.0754, Validation F1 Score: 0.9401, Validation gender F1 Score: 0.9930, Validation article F1 Score: 0.9593, Validation season F1 Score: 0.8672, Validation usage_metrics F1 Score: 0.9408, Validation Acc Score: 0.9412, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [12/20], Training Loss: 14.5899, Validation F1 Score: 0.9357, Validation gender F1 Score: 0.9933, Validation article F1 Score: 0.9542, Validation season F1 Score: 0.8532, Validation usage_metrics F1 Score: 0.9420, Validation Acc Score: 0.9362, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [13/20], Training Loss: 13.5002, Validation F1 Score: 0.9367, Validation gender F1 Score: 0.9920, Validation article F1 Score: 0.9575, Validation season F1 Score: 0.8548, Validation usage_metrics F1 Score: 0.9426, Validation Acc Score: 0.9380, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [14/20], Training Loss: 12.2391, Validation F1 Score: 0.9363, Validation gender F1 Score: 0.9904, Validation article F1 Score: 0.9556, Validation season F1 Score: 0.8552, Validation usage_metrics F1 Score: 0.9439, Validation Acc Score: 0.9369, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [15/20], Training Loss: 11.4764, Validation F1 Score: 0.9423, Validation gender F1 Score: 0.9915, Validation article F1 Score: 0.9612, Validation season F1 Score: 0.8686, Validation usage_metrics F1 Score: 0.9478, Validation Acc Score: 0.9432, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [16/20], Training Loss: 10.7368, Validation F1 Score: 0.9393, Validation gender F1 Score: 0.9927, Validation article F1 Score: 0.9628, Validation season F1 Score: 0.8591, Validation usage_metrics F1 Score: 0.9424, Validation Acc Score: 0.9402, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [17/20], Training Loss: 9.8728, Validation F1 Score: 0.9398, Validation gender F1 Score: 0.9944, Validation article F1 Score: 0.9593, Validation season F1 Score: 0.8612, Validation usage_metrics F1 Score: 0.9442, Validation Acc Score: 0.9409, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [18/20], Training Loss: 9.5188, Validation F1 Score: 0.9366, Validation gender F1 Score: 0.9934, Validation article F1 Score: 0.9615, Validation season F1 Score: 0.8465, Validation usage_metrics F1 Score: 0.9450, Validation Acc Score: 0.9372, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [19/20], Training Loss: 8.8324, Validation F1 Score: 0.9357, Validation gender F1 Score: 0.9940, Validation article F1 Score: 0.9613, Validation season F1 Score: 0.8393, Validation usage_metrics F1 Score: 0.9484, Validation Acc Score: 0.9362, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [20/20], Training Loss: 8.2563, Validation F1 Score: 0.9400, Validation gender F1 Score: 0.9942, Validation article F1 Score: 0.9593, Validation season F1 Score: 0.8613, Validation usage_metrics F1 Score: 0.9454, Validation Acc Score: 0.9409, 
Training completed

Test start..

Epoch [15/20], Test F1 Score: 0.9442, Test gender F1 Score: 0.9921, Test article F1 Score: 0.9674, Test season F1 Score: 0.8694, Test usage_metrics F1 Score: 0.9480, Test Acc Score: 0.9450, 


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [2]:
pip install dropbox

Collecting dropbox
  Downloading dropbox-12.0.2-py3-none-any.whl.metadata (4.3 kB)
Collecting stone<3.3.3,>=2 (from dropbox)
  Downloading stone-3.3.1-py3-none-any.whl.metadata (8.0 kB)
Collecting ply>=3.4 (from stone<3.3.3,>=2->dropbox)
  Downloading ply-3.11-py2.py3-none-any.whl.metadata (844 bytes)
Downloading dropbox-12.0.2-py3-none-any.whl (572 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.1/572.1 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading stone-3.3.1-py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.3/162.3 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ply-3.11-py2.py3-none-any.whl (49 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ply, stone, dropbox
Successfully installed dropbox-12.0.2 ply-3.11 stone-3.3.1
Note: you may need to restart the kernel to use

In [3]:
import os
import dropbox

def upload_files_to_dropbox(local_directory, dbx_token,target_base_name=None):
    # Dropbox에 연결
    dbx = dropbox.Dropbox(dbx_token)

    # 로컬 디렉토리 이름을 가져옴
    if(target_base_name is None):
        base_folder_name = os.path.basename(local_directory.rstrip(os.path.sep))
    else:
        base_folder_name = os.path.basename(local_directory.rstrip(os.path.sep))
        base_folder_name = f"{target_base_name}/{base_folder_name}"
        

    # 지정된 로컬 디렉토리의 파일과 디렉토리를 순회
    for root, dirs, files in os.walk(local_directory):
        for file in files:
            # 파일의 전체 로컬 경로
            local_path = os.path.join(root, file)
            
            # Dropbox에 업로드할 경로 설정
            relative_path = os.path.relpath(local_path, local_directory)
            dropbox_path = f"/{base_folder_name}/{relative_path.replace(os.path.sep, '/')}"

            # 파일 업로드
            with open(local_path, "rb") as f:
                try:
                    dbx.files_upload(f.read(), dropbox_path, mode=dropbox.files.WriteMode("overwrite"))
                    print(f"Uploaded {local_path} to {dropbox_path}")
                except Exception as e:
                    print(f"Failed to upload {local_path} to {dropbox_path}: {e}")

# 설정값 입력
LOCAL_DIRECTORY = root_work_dir
ACCESS_TOKEN = "sl.B6zi_F8zp3AXIFpvndcwkdfxWRRcy3rDyyhq6a1L4IBoqlL9QFYAUDwBhrVT4B_kp6dvTtRnHSSQOSS4m3DXyQRS_kzoSJ-MY76mYvKQ8g5oSZPDlodU9M7X8yH0Ny_KR_QYdEmxKmnojv3fLT7pAYs"

upload_files_to_dropbox(LOCAL_DIRECTORY, ACCESS_TOKEN,"multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal")


Uploaded /kaggle/working/2024-08-12_04_34_34/max_epoch_log.txt to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/max_epoch_log.txt
Uploaded /kaggle/working/2024-08-12_04_34_34/weights/train_14.pth to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/weights/train_14.pth
Uploaded /kaggle/working/2024-08-12_04_34_34/weights/train_17.pth to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/weights/train_17.pth
Uploaded /kaggle/working/2024-08-12_04_34_34/weights/train_19.pth to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/weights/train_19.pth
Uploaded /kaggle/working/2024-08-12_04_34_34/weights/train_4.pth to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/weights/train_4.pth
Uploaded /kaggle/working/2024-08-12_04_34_34/weights/train_7.pth to /multi-modal_hardparam_ce-loss_efficientnetb0_w2v_normal/2024-08-12_04_34_34/weights/train_7.pth
Uploaded