<a href="https://colab.research.google.com/github/dbanerjee181/Fixmatch/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!git clone https://github.com/megvii-research/FullMatch.git

fatal: destination path 'FullMatch' already exists and is not an empty directory.


In [5]:
import os
import logging
import random
import warnings
import numpy as np

import megengine
import megengine.functional as F
import megengine.module as M
import megengine.optimizer as optim
import megengine.autodiff as autodiff
import megengine.distributed as dist
import megengine.data as data

from train_utils import TBLog, get_optimizer
from utils import get_logger, net_builder, str2bool, over_write_args_from_file
from models.fullflex.fullflex import FullFlex
from datasets.ssl_dataset import SSL_Dataset

ModuleNotFoundError: No module named 'megengine'

In [6]:
def worker(args):

    args.world_size = dist.get_world_size()
    args.gpu = dist.get_rank()
    save_path = os.path.join(args.save_dir, args.save_name)

    if args.seed is not None:
        random.seed(args.seed)
        megengine.random.seed(args.seed)
        np.random.seed(args.seed)

    save_path = os.path.join(args.save_dir, args.save_name)
    logger_level = "WARNING"
    tb_log = None
    if args.gpu == 0 :
        tb_log = TBLog(args.save_dir, args.save_name)
        logger_level = "INFO"

    logger = get_logger(args.save_name, save_path, logger_level)
    logger.warning(f"USE GPU: {args.gpu} for training")

    if args.dataset.upper() == 'CIFAR100' and args.num_labels==400 and args.world_size > 1:
        args.sync_bn = True

    args.bn_momentum = 0.999
    if 'imagenet' in args.dataset.lower():
        print('Please Waiting for Supporting')
        exit()
    else:
        _net_builder = net_builder(args.net, args.net_from_name,
                                    {'first_stride': 2 if 'stl' in args.dataset else 1,
                                    'depth': args.depth,
                                    'widen_factor': args.widen_factor,
                                    'leaky_slope': args.leaky_slope,
                                    'bn_momentum': args.bn_momentum,
                                    'dropRate': args.dropout,
                                    'use_embed': False,
                                    'is_remix': False,
                                    'sync_bn': args.sync_bn},)

    model = FullFlex(_net_builder, args.num_classes, args.ema_m, args.p_cutoff, args.ulb_loss_ratio, args.hard_label,
                        num_eval_iter=args.num_eval_iter, tb_log=tb_log, logger=logger)

    optimizer = get_optimizer(model.model, args.optim, args.lr, args.momentum, args.weight_decay)
    model.set_optimizer(optimizer)

    if args.resume:
        logger.info("==> Resuming from checkpoint..")
        assert os.path.isfile(args.resume), "Error: no checkpoint directory found!"
        checkpoint = megengine.load(args.resume, map_location='cpu')
        model.model.load_state_dict(checkpoint['state_dict'])
        model.ema.ema.load_state_dict(checkpoint['ema_state_dict'])
        megengine.distributed.group_barrier()

    args.batch_size = int(args.batch_size / args.world_size)
    logger.info(f"model_arch: {model}")

    if args.dataset != "imagenet":
        if args.gpu != 0:
            megengine.distributed.group_barrier()
        train_dset = SSL_Dataset(args, name=args.dataset, train=True, num_classes=args.num_classes, data_dir=args.data_dir)
        lb_dset, ulb_dset = train_dset.get_ssl_dset(args.num_labels)

        _eval_dset = SSL_Dataset(args, name=args.dataset, train=False, num_classes=args.num_classes, data_dir=args.data_dir)
        eval_dset = _eval_dset.get_dset()
        if args.gpu == 0:
            megengine.distributed.group_barrier()
    else:
        print('Please Waiting for Supporting')
        exit()

    loader_dict = {}
    dset_dict = {'train_lb': lb_dset, 'train_ulb': ulb_dset, 'eval': eval_dset}

    loader_dict['train_lb'] = data.DataLoader(dset_dict['train_lb'],
                                        sampler= data.Infinite(data.RandomSampler(dset_dict['train_lb'], batch_size=args.batch_size)),
                                        num_workers=args.num_workers)
    loader_dict['train_ulb'] = data.DataLoader(dset_dict['train_ulb'],
                                        sampler=data.Infinite(data.RandomSampler(dset_dict['train_ulb'], batch_size=args.batch_size*args.uratio)),
                                        num_workers=args.num_workers)
    loader_dict['eval'] = data.DataLoader(dset_dict['eval'],
                                    sampler=data.SequentialSampler(dset_dict['eval'], batch_size=args.eval_batch_size,),
                                    num_workers=4)

    model.set_data_loader(loader_dict)
    model.set_dset(ulb_dset)
    megengine.distributed.group_barrier()

    trainer = model.train
    trainer(args, logger=logger)


In [2]:
from tensorboardX import SummaryWriter
import megengine as mge
import megengine.functional as F
import megengine.distributed as dist

from copy import deepcopy
import os
import math

ModuleNotFoundError: No module named 'tensorboardX'

In [3]:
def get_optimizer(net, optim_name='SGD', lr=0.1, momentum=0.9, weight_decay=0, nesterov=True, bn_wd_skip=True):

    decay = []
    no_decay = []
    for name, param in net.named_parameters():
        if ('bn' in name or 'bias' in name) and bn_wd_skip:
            no_decay.append(param)
        else:
            decay.append(param)

    per_param_args = [{'params': decay},
                      {'params': no_decay, 'weight_decay': 0.0}]

    optimizer = mge.optimizer.SGD(per_param_args, lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=nesterov)

    return optimizer

In [4]:
def adjust_learning_rate(optimizer, current_step, num_training_steps, num_cycles=7. / 16., num_warmup_steps=0, base_lr=0.03):

    if current_step < num_warmup_steps:
        _lr = float(current_step) / float(max(1, num_warmup_steps))
    else:
        num_cos_steps = float(current_step - num_warmup_steps)
        num_cos_steps = num_cos_steps / float(max(1, num_training_steps - num_warmup_steps))
        _lr = max(0.0, math.cos(math.pi * num_cycles * num_cos_steps))
    _lr = _lr * base_lr
    for param_group in optimizer.param_groups:
        param_group["lr"] = _lr
    return _lr

In [20]:
!pip install megengine

Collecting megengine
  Downloading MegEngine-1.13.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pyarrow<=11.0.0 (from megengine)
  Downloading pyarrow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting mprop (from megengine)
  Downloading mprop-0.17.0.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting megfile==2.0.5 (from megengine)
  Downloading megfile-2.0.5-py3-none-any.whl.metadata (10 kB)
Collecting boto3 (from megfile==2.0.5->megengine)
  Downloading boto3-1.35.54-py3-none-any.whl.metadata (6.7 kB)
Collecting botocore>=1.13.0 (from megfile==2.0.5->megengine)
  Downloading botocore-1.35.54-py3-none-any.whl.metadata (5.7 kB)
Collecting paramiko (from megfile==2.0.5->megengine)
  Downloading paramiko-3.5.0-py3-none-any.whl.metadata (4.4 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from botocore>=1.13.0->megfile==2.0.5->megengine)
  Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)
Col

In [21]:
!python /content/Full_Match_pytorch/fullflex.py --c /content/Full_Match_pytorch/config/fullflex/fullflex_cifar100.yaml

2024-11-03 22:31:06.635161: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-03 22:31:06.660932: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-03 22:31:06.668136: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-03 22:31:06.686106: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
info: +++++++++++++++++++++++++++++++++++++++++++++++