In [18]:
from sklearn.model_selection import KFold
import pandas as pd

In [19]:
import wandb
wandb.login()

True

In [20]:
sweep_config = {
    'method': 'random'
}

In [21]:
metric = {
    'name': 'Grand Mean',
    'goal': 'maximize'
}

sweep_config['metric'] = metric

In [22]:
parameters_dict = {
    'dropout': {
      'values': [0.72]
        },

    'learning_rate': {
        'values': [0.0005]
        },

    'batch_size': {
        'values': [4096]
        },

    'data_augmentation_multiple': {
        'values': [5]
        }
}

In [23]:
sweep_config['parameters'] = parameters_dict

In [24]:
sweep_id = wandb.sweep(sweep_config, project="sub_loc_train_only_smote")

Create sweep with ID: my2iqzjz
Sweep URL: https://wandb.ai/imucs/sub_loc_train_only_smote/sweeps/my2iqzjz


In [25]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Grand Mean'},
 'parameters': {'batch_size': {'values': [4096]},
                'data_augmentation_multiple': {'values': [5]},
                'dropout': {'values': [0.72]},
                'learning_rate': {'values': [0.0005]}}}


In [26]:
feature_pd = pd.read_csv('/home/kongge/projects/new_protT5/data/DPC_T5_578_right.csv')
labels_pd = pd.read_csv("/home/kongge/projects/new_protT5/data/mutil_label_578.csv")

In [27]:
from dataAug.tools import MLDA

In [28]:
from dataAug.all_tools import dataAugSMOTE

In [29]:

smote_multiple = {}

In [30]:
import time
from classify.targeTools import testThresholdFive, Accuracy
from torch import optim
from torch.utils.data import DataLoader, TensorDataset
import torch
from classify.Classify_adjust import ModelClassify
def train_and_val(config=None):
    with wandb.init(config=config):
        config = wandb.config
        kf = KFold(n_splits=10, shuffle=True)
        model_discord = []

        G_feature, G_label = dataAugSMOTE(feature_pd, labels_pd, config.data_augmentation_multiple, 1424)

        feature_all = pd.concat([feature_pd, G_feature], axis=0)
        label_all = pd.concat([labels_pd, G_label], axis=0)
        print(feature_all.shape)
        print(label_all.shape)
        for train_index, test_index in kf.split(feature_all):

            train_data = feature_all.iloc[train_index]
            train_label = label_all.iloc[train_index]

            test_data = feature_all.iloc[test_index]
            test_label = label_all.iloc[test_index]

            datasetTrain = TensorDataset(torch.tensor(train_data.values), torch.tensor(train_label.values))
            batch_size = config.batch_size
            dataloaderTrain = DataLoader(datasetTrain, batch_size=batch_size, shuffle=True)

            datasetTest = TensorDataset(torch.tensor(test_data.values), torch.tensor(test_label.values))

            batch_size = len(datasetTest)
            dataloaderTest = DataLoader(datasetTest, batch_size=batch_size, shuffle=False)

            model = ModelClassify(drop_rate=config.dropout, num_class=5, feature_num=1424)
            criterion = torch.nn.BCELoss()
            optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
            import warnings
            warnings.filterwarnings("ignore")
            GMList = {}
            epochs = 100
            for epoch in range(epochs):
                model.train()
                total_loss = 0.0
                for idx, data in enumerate(dataloaderTrain, 0):
                    inputs, labels = data
                    labels = labels.float()
                    inputs = inputs.float()
                    out = model(inputs)
                    loss = criterion(out, labels)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()
                avg_loss = total_loss / len(dataloaderTrain)
                threshold = 0.5
                labels_cov = torch.where(out > threshold, torch.tensor(1), torch.tensor(0))
                print(f"Epoch [{epoch+1}/{epochs}], Average Loss: {avg_loss:.4f}, ACC: {Accuracy(labels.int(), labels_cov)}")
            GMScore1 = testThresholdFive(epoch, model, dataloaderTest, class_num=5)
            GMList[epoch] = GMScore1
            t = time.time()
            best_key = max(GMList, key=GMList.get)
            best_value = GMList[best_key]

            best_value = [x.item() if isinstance(x, torch.Tensor) else x for x in best_value]
            model_discord.append(best_value)
        model_discord_column_means = [sum(col) / len(col) for col in zip(*model_discord)]
        smote_multiple[int(t)] = model_discord_column_means
        wandb.log({"Grand Mean": model_discord_column_means[0]})

In [31]:
wandb.agent(sweep_id, train_and_val, count=6)

[34m[1mwandb[0m: Agent Starting Run: gigqxkwj with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7299, ACC: 0.2033136889991733
Epoch [2/100], Average Loss: 0.7226, ACC: 0.20992038875103486
Epoch [3/100], Average Loss: 0.7111, ACC: 0.2215880893300263
Epoch [4/100], Average Loss: 0.7059, ACC: 0.23451199338296283
Epoch [5/100], Average Loss: 0.6986, ACC: 0.24310897435897655
Epoch [6/100], Average Loss: 0.6877, ACC: 0.2527295285359829
Epoch [7/100], Average Loss: 0.6835, ACC: 0.26278432588916745
Epoch [8/100], Average Loss: 0.6686, ACC: 0.2845843672456609
Epoch [9/100], Average Loss: 0.6616, ACC: 0.29807692307692685
Epoch [10/100], Average Loss: 0.6488, ACC: 0.31872932175351937
Epoch [11/100], Average Loss: 0.6400, ACC: 0.3405086848635257
Epoch [12/100], Average Loss: 0.6312, ACC: 0.3517162944582312
Epoch [13/100], Average Loss: 0.6238, ACC: 0.3592741935483881
Epoch [14/100], Average Loss: 0.6121, ACC: 0.3740539702233257
Epoch [15/100], Average Loss: 0.6047, ACC: 0.3868331265508678
Epoch [16/100], Average Loss: 0.5969, ACC: 0.39091

VBox(children=(Label(value='0.009 MB of 0.072 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.117530…

0,1
Grand Mean,▁

0,1
Grand Mean,0.9449


[34m[1mwandb[0m: Agent Starting Run: zcxac1fs with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7182, ACC: 0.20471464019851168
Epoch [2/100], Average Loss: 0.7128, ACC: 0.20750103391232513
Epoch [3/100], Average Loss: 0.7043, ACC: 0.22594602977667647
Epoch [4/100], Average Loss: 0.6956, ACC: 0.22868072787427773
Epoch [5/100], Average Loss: 0.6867, ACC: 0.2528019023986786
Epoch [6/100], Average Loss: 0.6791, ACC: 0.26370967741935764
Epoch [7/100], Average Loss: 0.6692, ACC: 0.2790736145574882
Epoch [8/100], Average Loss: 0.6587, ACC: 0.29224565756824156
Epoch [9/100], Average Loss: 0.6460, ACC: 0.31522952853598407
Epoch [10/100], Average Loss: 0.6372, ACC: 0.3327956989247335
Epoch [11/100], Average Loss: 0.6266, ACC: 0.3514733250620359
Epoch [12/100], Average Loss: 0.6158, ACC: 0.3680366004962791
Epoch [13/100], Average Loss: 0.6065, ACC: 0.3889216294458221
Epoch [14/100], Average Loss: 0.5961, ACC: 0.3957247725392883
Epoch [15/100], Average Loss: 0.5896, ACC: 0.412034739454093
Epoch [16/100], Average Loss: 0.5808, ACC: 0.42207

VBox(children=(Label(value='0.009 MB of 0.072 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.117810…

0,1
Grand Mean,▁

0,1
Grand Mean,0.94432


[34m[1mwandb[0m: Agent Starting Run: l8g80dio with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7290, ACC: 0.19596774193548425
Epoch [2/100], Average Loss: 0.7142, ACC: 0.21854838709677518
Epoch [3/100], Average Loss: 0.7063, ACC: 0.23742762613730523
Epoch [4/100], Average Loss: 0.6986, ACC: 0.23754135649297117
Epoch [5/100], Average Loss: 0.6877, ACC: 0.2585246071133192
Epoch [6/100], Average Loss: 0.6782, ACC: 0.2751705955335018
Epoch [7/100], Average Loss: 0.6688, ACC: 0.2886838296112527
Epoch [8/100], Average Loss: 0.6577, ACC: 0.30525227460711707
Epoch [9/100], Average Loss: 0.6438, ACC: 0.33364350703060713
Epoch [10/100], Average Loss: 0.6333, ACC: 0.3455179900744437
Epoch [11/100], Average Loss: 0.6234, ACC: 0.3622622001654271
Epoch [12/100], Average Loss: 0.6182, ACC: 0.3650330851943759
Epoch [13/100], Average Loss: 0.6076, ACC: 0.3812810173697275
Epoch [14/100], Average Loss: 0.5993, ACC: 0.39733250620347355
Epoch [15/100], Average Loss: 0.5932, ACC: 0.401830024813895
Epoch [16/100], Average Loss: 0.5843, ACC: 0.41962

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Grand Mean,▁

0,1
Grand Mean,0.94208


[34m[1mwandb[0m: Agent Starting Run: mey8r9s4 with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666879555000378, max=1.0)…

(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7382, ACC: 0.20046526054590616
Epoch [2/100], Average Loss: 0.7325, ACC: 0.2031689412737802
Epoch [3/100], Average Loss: 0.7223, ACC: 0.22067307692307803
Epoch [4/100], Average Loss: 0.7161, ACC: 0.22949751861042367
Epoch [5/100], Average Loss: 0.7046, ACC: 0.2488058312655113
Epoch [6/100], Average Loss: 0.6939, ACC: 0.2606182795698957
Epoch [7/100], Average Loss: 0.6878, ACC: 0.27601840363937474
Epoch [8/100], Average Loss: 0.6770, ACC: 0.29219396195203023
Epoch [9/100], Average Loss: 0.6673, ACC: 0.311429900744421
Epoch [10/100], Average Loss: 0.6585, ACC: 0.3240281224152232
Epoch [11/100], Average Loss: 0.6449, ACC: 0.343196856906537
Epoch [12/100], Average Loss: 0.6366, ACC: 0.35291563275434396
Epoch [13/100], Average Loss: 0.6262, ACC: 0.3653122415219192
Epoch [14/100], Average Loss: 0.6183, ACC: 0.37774503722084357
Epoch [15/100], Average Loss: 0.6100, ACC: 0.3845068238213401
Epoch [16/100], Average Loss: 0.6021, ACC: 0.393837

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Grand Mean,▁

0,1
Grand Mean,0.94343


[34m[1mwandb[0m: Agent Starting Run: ipsbh6ay with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7355, ACC: 0.18443961952026447
Epoch [2/100], Average Loss: 0.7232, ACC: 0.20365488006617127
Epoch [3/100], Average Loss: 0.7146, ACC: 0.2133478081058738
Epoch [4/100], Average Loss: 0.7090, ACC: 0.22535669975186237
Epoch [5/100], Average Loss: 0.7012, ACC: 0.23536497105045684
Epoch [6/100], Average Loss: 0.6952, ACC: 0.2503618693134844
Epoch [7/100], Average Loss: 0.6826, ACC: 0.26069582299421273
Epoch [8/100], Average Loss: 0.6752, ACC: 0.2690239867659249
Epoch [9/100], Average Loss: 0.6626, ACC: 0.30058416046319636
Epoch [10/100], Average Loss: 0.6531, ACC: 0.31553453267163317
Epoch [11/100], Average Loss: 0.6442, ACC: 0.3280397022332534
Epoch [12/100], Average Loss: 0.6307, ACC: 0.34255066170388987
Epoch [13/100], Average Loss: 0.6240, ACC: 0.35967224979321905
Epoch [14/100], Average Loss: 0.6122, ACC: 0.372585814722912
Epoch [15/100], Average Loss: 0.6064, ACC: 0.3888854425144745
Epoch [16/100], Average Loss: 0.5973, ACC: 0.393

0,1
Grand Mean,▁

0,1
Grand Mean,0.94754


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fy6i09uj with config:
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	data_augmentation_multiple: 5
[34m[1mwandb[0m: 	dropout: 0.72
[34m[1mwandb[0m: 	learning_rate: 0.0005


(3583, 1424)
(3583, 5)
Epoch [1/100], Average Loss: 0.7234, ACC: 0.2005789909015719
Epoch [2/100], Average Loss: 0.7159, ACC: 0.2079818031430941
Epoch [3/100], Average Loss: 0.7083, ACC: 0.22054900744416964
Epoch [4/100], Average Loss: 0.6985, ACC: 0.23520988420182146
Epoch [5/100], Average Loss: 0.6898, ACC: 0.2510752688172063
Epoch [6/100], Average Loss: 0.6800, ACC: 0.2638906120760985
Epoch [7/100], Average Loss: 0.6694, ACC: 0.2845119933829647
Epoch [8/100], Average Loss: 0.6635, ACC: 0.28982113316791086
Epoch [9/100], Average Loss: 0.6496, ACC: 0.31548800661704324
Epoch [10/100], Average Loss: 0.6398, ACC: 0.33142059553350184
Epoch [11/100], Average Loss: 0.6314, ACC: 0.3402760545905733
Epoch [12/100], Average Loss: 0.6206, ACC: 0.3624431348221688
Epoch [13/100], Average Loss: 0.6133, ACC: 0.36902915632754435
Epoch [14/100], Average Loss: 0.6053, ACC: 0.3721464019851125
Epoch [15/100], Average Loss: 0.5973, ACC: 0.39284015715467274
Epoch [16/100], Average Loss: 0.5889, ACC: 0.4033

VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Grand Mean,▁

0,1
Grand Mean,0.93346


In [32]:
smote_multiple

{1695265751: [0.9448953866958618,
  0.9101476789965922,
  0.9427021573478991,
  0.9448885440826416,
  0.9731630861759186,
  0.9535754144191741],
 1695266426: [0.9443237960338593,
  0.912386206252626,
  0.9422404983841937,
  0.9447523951530457,
  0.9699548125267029,
  0.952285087108612],
 1695267103: [0.94207923412323,
  0.9028602107032258,
  0.9395670261381968,
  0.9422643542289734,
  0.9738553166389465,
  0.9518492341041564],
 1695267793: [0.9434331834316254,
  0.9070548232987348,
  0.9411072293718844,
  0.943432766199112,
  0.9730200469493866,
  0.952551144361496],
 1695268376: [0.9475350558757782,
  0.9123722008683337,
  0.9452559354299911,
  0.9476271152496338,
  0.9761406600475311,
  0.9562793374061584],
 1695268849: [0.9334625542163849,
  0.8810600519755372,
  0.9300893361966563,
  0.9322525918483734,
  0.9773510813713073,
  0.9465597450733185]}

In [33]:
import json
file = open("/home/kongge/projects/new_protT5/data/dictionary_data_only_smote.json", "w")
json.dump(smote_multiple, file)
file.close()