# Sweep attempt

In [1]:
!pip install wandb

Collecting wandb
  Obtaining dependency information for wandb from https://files.pythonhosted.org/packages/35/d3/6bfe29e4ba1eb2400d478caf8e3af9a1c366390390069cda59a7c6bf6063/wandb-0.16.1-py3-none-any.whl.metadata
  Using cached wandb-0.16.1-py3-none-any.whl.metadata (9.8 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Obtaining dependency information for sentry-sdk>=1.0.0 from https://files.pythonhosted.org/packages/ee/61/72bf9b0326f77486403f468b0466a3eeb6f7613ba96b714f6974fe6b9c36/sentry_sdk-1.38.0-py2.py3-none-any.whl.metadata
  Using cached sentry_sdk-1.38.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Using cached docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle (from wandb)
  Obtaining dependency information for setproctitle from https://files.pythonhosted.org/packages/79/e7/54b36be02aee8ad573be68f6f46fd62838735c2f007b22df50eb5e13a20d/setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_

In [6]:
import wandb
import numpy as np
import sys
import torch
import torch.utils.data as Data
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
import torch.nn as nn

In [7]:
from funcs import regression_system
from funcs import fcnn
from funcs import dataset

In [8]:
wandb.login()

True

In [9]:
inputs = ['grad_B','FCOR', 'Nsquared', 'HML', 'TAU', 'Q', 'HBL', 'div', 'vort', 'strain']

In [10]:
submeso_dataset=dataset.SubmesoDataset(inputs,res='1_4')

In [11]:
train_loader=DataLoader(
    submeso_dataset,
    #num_workers=1,
    batch_size=64,
    sampler=SubsetRandomSampler(submeso_dataset.train_ind))

In [12]:
test_loader=DataLoader(
    submeso_dataset,
    #num_workers=1,
    batch_size=len(submeso_dataset.test_ind),
    sampler=submeso_dataset.test_ind)

In [13]:
if torch.cuda.is_available():
    print("CUDA Available")
    device = torch.device('cuda')
else:
    print('CUDA Not Available')
    device = torch.device('cpu')

CUDA Available


In [32]:
def train():
    wandb.init(project="submeso_ML",config=config)
    model=fcnn.FCNN(config)

    total_params = sum(param.numel() for param in model.parameters())
    wandb.config.update({"Model Parameters": total_params})

    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    wandb.config.update({"Trainable Parameters": trainable_params})
    
    system=regression_system.RegressionSystem(model,wandb.config["lr"],wandb.config["wd"])
    wandb.watch(model, log_freq=1)
    wandb_logger = WandbLogger()

    trainer = pl.Trainer(
        accelerator="auto",
        max_epochs=config["epochs"],
        enable_progress_bar=False,
        logger=wandb_logger,
        )
    trainer.fit(system, train_loader, test_loader)
  
    wandb.finish()

In [None]:
config={"seed":seed,
        "lr":lr,
        "wd":wd,
        "batch_size":batch_size,
        "input_channels":input_channels,
        "output_channels":output_channels,
        "activation":activation,
        "save_name":save_name,
        "save_path":save_path,
        "arch":arch,
        "conv_layers":conv_layers,
        "kernel":kernel,
        "kernel_hidden":kernel_hidden,
        "epochs":epochs}

In [33]:
sweep_config = {
    'method': 'random',
    'name': 'sweep_test',
    'metric': {
    'name': 'test_loss',      #what's the name of loss here? it's definted in regression_system right?
    'goal': 'minimize'},
    'parameters': {
        'seed': {
            'value': 123},
        'lr': {
            'distribution': 'uniform',
            'min': 0,
            'max': 0.1},
        'wd': {
            'distribution': 'uniform',
            'min': 0,
            'max': 1}}}
        'batch_size': {
            'values': [16,32,64,128,256,512,1024]},
        'input_channels': {
            'value': len(inputs)},
        'output_channels': {
            'value': 1},
        'activation': {
            'values': ['ReLU', 'LeakyReLu', 'Tanh']},
        'save_name': {
            'value': save_name},
        'save_path': {
            'value': save_path},
        'arch': {
            'value': 'fcnn'},
        'conv_layers': {
            'values': [1,2,3,4,5,6,7,8]},
        'kernal': {
            'values': [2,3,4,5,6,7,8]},
        'kernal_hidden': {
            'values': [2,3,4,5,6,7,8]},
        'epochs': {
            'value': 100},
    #'early_terminate':
    #    'type': 'hyperband',
    #    'min_iter':3}

In [34]:
'''seed=123
batch_size=256
input_channels=len(inputs)
output_channels=1
conv_layers = 2
kernel = 5
kernel_hidden = 3
activation="ReLU"
arch="fcnn"
epochs=100
save_path=BASE
save_name="test-3.pt"
lr=0.0025
wd=0.023133758465751404''';

In [35]:
sweep_id = wandb.sweep(sweep_config,project='submeso_ML')
#sweep_id = wandb.sweep(sweep=sweep_configuration, project="%s" % project_name)

Create sweep with ID: b1v02hyj
Sweep URL: https://wandb.ai/fagerheim/submeso_ML/sweeps/b1v02hyj


In [36]:
wandb.agent(sweep_id, function=train, project='submeso_ML',count=5)
#wandb.agent(sweep_id, function=train, count=30)

[34m[1mwandb[0m: Agent Starting Run: rdlr7q07 with config:
[34m[1mwandb[0m: 	activation: Tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_layers: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_channels: 10
[34m[1mwandb[0m: 	kernal: 4
[34m[1mwandb[0m: 	kernal_hidden: 2
[34m[1mwandb[0m: 	learning_rate: 0.06708383129830534
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	wd: 0.31820142276452745
cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)


VBox(children=(Label(value='0.036 MB of 0.036 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run rdlr7q07 errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run rdlr7q07 errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: Agent Starting Run: pqu26zup with config:
[34m[1mwandb[0m: 	activation: LeakyReLu
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	conv_layers: 3
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_channels: 10
[34m[1mwandb[0m: 	kernal: 3
[34m[1mwandb[0m: 	kernal_hidden: 8
[34m[1mwandb[0m: 	learning_rate: 0.08740530871898394
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	wd: 0.6788077967972709
cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)


VBox(children=(Label(value='0.036 MB of 0.036 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run pqu26zup errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run pqu26zup errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: Agent Starting Run: 45zvj82i with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	conv_layers: 6
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_channels: 10
[34m[1mwandb[0m: 	kernal: 4
[34m[1mwandb[0m: 	kernal_hidden: 6
[34m[1mwandb[0m: 	learning_rate: 0.02066238167366118
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	wd: 0.12817020206267415
cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)


VBox(children=(Label(value='0.036 MB of 0.036 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run 45zvj82i errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run 45zvj82i errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: Agent Starting Run: cpatoywx with config:
[34m[1mwandb[0m: 	activation: LeakyReLu
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	conv_layers: 2
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_channels: 10
[34m[1mwandb[0m: 	kernal: 2
[34m[1mwandb[0m: 	kernal_hidden: 5
[34m[1mwandb[0m: 	learning_rate: 0.009782849147380849
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	wd: 0.7441744723394297
cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)


VBox(children=(Label(value='0.036 MB of 0.036 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

Run cpatoywx errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run cpatoywx errored: NameError("name 'config' is not defined")
[34m[1mwandb[0m: Agent Starting Run: wqlmqkeu with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	conv_layers: 8
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	input_channels: 10
[34m[1mwandb[0m: 	kernal: 6
[34m[1mwandb[0m: 	kernal_hidden: 7
[34m[1mwandb[0m: 	learning_rate: 0.0762862600005293
[34m[1mwandb[0m: 	seed: 123
[34m[1mwandb[0m: 	wd: 0.2287153309919775
cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [37]:
wandb.finish()

Problem at:

Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m Problem finishing run
Exception in thread Thread-23 (_run_job):
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 1170, in init
    run = wi.init()
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/wandb_init.py", line 840, in init
    run._on_start()
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 2272, in _on_start
    self._header(
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 3430, in _header
    Run._header_wandb_version_info(settings=settings, printer=printer)
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/wandb_run.py", line 3463, in _header_wandb_version_info
    printer.display(
  File "/srv/conda/envs/notebook/lib/python3.10/site-packages/wandb/sdk/lib/printer.py", line 68, in display
    se

AttributeError: 'ZMQDisplayPublisher' object has no attribute '_orig_publish'