In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#Imports:
from comet_ml import Experiment
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import random
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB,CategoricalNB
import nltk
nltk.download('stopwords')
nltk.download('punkt')
import re
from nltk.corpus import stopwords
import string
from sklearn import preprocessing
from sklearn.manifold import TSNE
import seaborn as sns
from nltk.stem.porter import PorterStemmer
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn import svm
from nltk.tokenize import word_tokenize
from sklearn.metrics import accuracy_score
from time import time
from sklearn.model_selection import StratifiedKFold
from sklearn.decomposition import PCA
import optuna
sns.set(context='notebook', style='darkgrid', palette='colorblind', font='sans-serif', font_scale=1, rc=None)
matplotlib.rcParams['figure.figsize'] =[8,8]
matplotlib.rcParams.update({'font.size': 15})
matplotlib.rcParams['font.family'] = 'sans-serif'

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/bartalisd/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/bartalisd/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
#Load data:
with open("../data_root.txt") as f:
    data_root_dir = f.read()
data_root_dir
train = pd.read_csv('%s/tweet_disaster/train.csv' % data_root_dir)
test = pd.read_csv('%s/tweet_disaster/test.csv' % data_root_dir)
#sub = pd.read_csv('../input/nlp-getting-started/sample_submission.csv')

In [4]:
#Analyse & clean data:
from data_analysis import analysis, data_cleaning
frame, train, vocabulary = analysis(train, test)
X, Y = data_cleaning(train, test)

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/bartalisd/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/bartalisd/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['text_length'] = train.text.apply(lambda x: len(x.split()))


In [5]:
#Train-test cut:
from apricot_exp.evaluation import traintest
X_train, X_test, Y_train, Y_test = traintest(X,Y,0.3)

In [6]:
#TFIDF & transpose data:
max_num_features = 10000
from data_analysis import tfidf
features_t, features_test_t = tfidf(X_train, X_test, max_num_features)
print(features_t.shape)
print(features_test_t.shape)

(10000, 5264)
(10000, 2257)


In [7]:
from apricot_exp.comet_utils import *
api_key = load_api_key('../../comet_key.txt')

In [8]:
#Bare model:
model = LogisticRegression(max_iter=1000)
n = 9000
experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
experiment.log_parameters({
    'size': None,
    'function': None,
})
tfidf = TfidfVectorizer(sublinear_tf=True,max_features=n, min_df=1, norm='l2',  ngram_range=(1,2))
features = tfidf.fit_transform(X_train).toarray()
features_test = tfidf.transform(X_test).toarray()
print(features.shape, features_test.shape)
from apricot_exp.evaluation import train_eval
train_eval(model, features,  Y_train, features_test, Y_test, experiment)
experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/ecbefb7ccf5a423f837b3fbb0ac5c67d



(5264, 9000) (2257, 9000)


COMET INFO: Uploading stats to Comet before program termination (may take several seconds)
COMET INFO: Still uploading


In [9]:
from apricot_exp.func_tp import featureb, facilityloc, maxcov
from comet import suggest_config, extract_grid

In [10]:
from optuna.samplers import TPESampler, RandomSampler, GridSampler
from parameters import param, algo
from apricot_exp.evaluation import train_eval

In [11]:
#Feature-based(naive, lazy, two-stage running time)
for i in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]:
    for j in ["naive", "lazy", "two-stage"]:
        experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
        n = i    
        start_time = time.time()
        Xtr_t, Xte_t = featureb(features_t, features_test_t, i, "sqrt", j)
        experiment.log_metric("running_time", time.time()-start_time)
        Xtr = Xtr_t.transpose()
        Xte = Xte_t.transpose()
        acc, pre, rec, roc = train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        print(j)
        print(time.time()-start_time)
        experiment.log_parameters({
            'optimizer': j,
            'size': i,
            'function': 'featurebased'
        })
        experiment.end() 

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/7d6f78b719ff48de9a0c8abd517b662f

COMET INFO: Still uploading


naive
8.883957386016846


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/c58ae00e7aa74150b48461585db4fb84

COMET INFO: Still uploading


lazy
5.935347080230713


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/4aee88119e8341ee89127ac68224290a

COMET INFO: Still uploading


two-stage
6.805429458618164


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/4009ffaa07c641a7b6d46483e6773c53

COMET INFO: Still uploading


naive
21.53317403793335


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/5eb3cb73ef0f4f8ea09cdd937afd92ad

COMET INFO: Still uploading


lazy
8.776475429534912


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/d9956c4c6cef4ee0baf0dcfbeae26253

COMET INFO: Still uploading


two-stage
24.756285667419434


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/b1666911cdd9432aa31ed2cc4e69d962

COMET INFO: Still uploading


naive
49.86116409301758


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/2795658ec0224b509b0aa7700e2a6e25

COMET INFO: Still uploading


lazy
24.49023127555847


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/839359c930b343c3bf7753d963cd8785

COMET INFO: Still uploading


two-stage
50.83109498023987


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/f992102f3fbb45cd82632deba315f5c8

COMET INFO: Still uploading


naive
216.6498670578003


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/e5651ae5ff0c4054a0774fa35f0637f8

COMET INFO: Still uploading


lazy
166.38418769836426


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/07e078f10ac7496b8cb551e08c19fb53

COMET INFO: Still uploading


two-stage
238.60188698768616


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/c594ed08e0f542bcab8eca10648a80be

COMET INFO: Still uploading


naive
783.258768081665


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/9965c5889416435fa8ee066866224e32

COMET INFO: Still uploading


lazy
670.4934828281403


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/2e5a2e6702cd4f90a06e9f191dd043a5

COMET INFO: Still uploading


two-stage
827.2879586219788


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/debed4e740c94f56b96bd082012793dc

COMET INFO: Still uploading


naive
1729.0269129276276


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/c52d0d959e4c49f99a1cb074ae9425c3

COMET INFO: Still uploading


lazy
1537.4941442012787


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/771b958e14644844b9ccf221a08f8d04

COMET INFO: Still uploading


two-stage
1780.882728099823


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/0bc3bacbb64e4e65a2cb43b6449e2d4b

COMET INFO: Still uploading


naive
1917.6446788311005


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/3886a1b437954bc9a3853deca88bde76

COMET INFO: Still uploading


lazy
1725.3056592941284


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/beb0703b21784a94ac173e2ab53e0472

COMET INFO: Still uploading


two-stage
2019.609894990921


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/e6c77477990b4a5ba65fef046ceb32f6

COMET INFO: Still uploading


naive
2465.3778603076935


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/b98ac3748889422587c16a8607e4da9a

COMET INFO: Still uploading


lazy
2196.1041157245636


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/7b6db85758e8479aa24cf4cc5917d1d4

COMET INFO: Still uploading


two-stage
2541.78133559227


In [None]:
#Feature-based(approximate-lazy, stochastic, sample)
for i in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]:
    for j in ["approximate-lazy", "stochastic", "sample"]:
        experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
        n = i    
        start_time = time.time()
        Xtr_t, Xte_t = featureb(features_t, features_test_t, i, "sqrt", j)
        experiment.log_metric("running_time", time.time()-start_time)
        Xtr = Xtr_t.transpose()
        Xte = Xte_t.transpose()
        acc, pre, rec, roc = train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        print(j)
        print(time.time()-start_time)
        experiment.log_parameters({
            'optimizer': j,
            'size': i,
            'function': 'featurebased'
        })
        experiment.end() 

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/0c5299ff6a8c43f39153d4a157453532

COMET INFO: Still uploading


approximate-lazy
6.62127161026001


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/f858edb6e4e7458ea1a8c13c7feb2529

COMET INFO: Still uploading


stochastic
8.08964228630066


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/8209bd5b3d7542e786c6654d7e742e79

COMET INFO: Still uploading


sample
6.667168378829956


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/6307924bc3064abb967688e20ede8c4b

COMET INFO: Still uploading


approximate-lazy
22.038702487945557


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/74a14820345e4353ac4c755f94576821

COMET INFO: Still uploading


stochastic
18.495970487594604


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/c1f78dd207e04f798c60204ff0a56545

COMET INFO: Still uploading


sample
23.85616946220398


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/ea84a0f7b67e48b2a43bd4ce8cf0faf8

COMET INFO: Still uploading


approximate-lazy
49.15369510650635


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/cfc15e5314484c8fbd97794f892bacbf

COMET INFO: Still uploading


stochastic
41.97713756561279


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/717a62b00e104f7f9f05fd4faeb11e5a

COMET INFO: Still uploading


sample
57.303250789642334


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/66a64c1b8659498784cdda827ec9b9c4

COMET INFO: Still uploading


approximate-lazy
232.5357527732849


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/34fff49c0dc249358772b7532c09e40c

COMET INFO: Still uploading


stochastic
212.11141324043274


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/d91465693afd48b0a7a727bd603fe904

COMET INFO: Still uploading


sample
233.47402572631836


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/45280e0c619d462cbe37b654c06ad3a1

COMET INFO: Still uploading


approximate-lazy
819.3130674362183


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/bf0ac0bee74e42abb5852b53a732f88c

COMET INFO: Still uploading


stochastic
779.4019298553467


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/fa93508af6f14a538401c8312494c59f

COMET INFO: Still uploading


sample
833.5406353473663


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/406971aeb31d4574aeb5f572e5bece73

COMET INFO: Still uploading


approximate-lazy
1754.3329780101776


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/454baecc07084e488d30cfb9c42abb8a

COMET INFO: Still uploading


stochastic
1704.5223467350006


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/c174894a662d40f9a2b8dc61a6c85bf5

COMET INFO: Still uploading


sample
1781.8982751369476


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/3a3372f0ffa946658f101c5c91baea43

COMET INFO: Still uploading


approximate-lazy
2016.9048430919647


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/15cbcc711a6641d4af0b54706d5f6f5e

COMET INFO: Still uploading


stochastic
1941.345813035965


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/53d5ff09e5f04caea767c1ad8b7ac2a8

COMET INFO: Still uploading


sample
2026.6739783287048


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/5350df9c676f4844ae817b1211786ba7

COMET INFO: Still uploading


approximate-lazy
2503.0214142799377


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/797db273b9b64810bdf54ef924dc05b4



In [None]:
parameters = param('featurebased')
search_alg = "GRID"
algo = algo(search_alg, parameters)
for i in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]:
    experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
    n = i    
    def objective(trial):
        config = suggest_config(parameters, trial)
        roc=0.0
        try:
            start_time = time.time()
            Xtr_t, Xte_t = featureb(features_t, features_test_t, i, config["function"], config["optimizer"])
            experiment.log_metric("running_time", time.time()-start_time)
            Xtr = Xtr_t.transpose()
            Xte = Xte_t.transpose()
            acc, pre, rec, roc = train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        except Exception as err:
            print(err)
        finally:   
            return roc
    study = optuna.create_study(direction="maximize", sampler=algo)
    study.optimize(objective, n_trials=20, n_jobs=1)
    best_param = study.best_params
    print(best_param)
    experiment.log_parameters({
        'best param': best_param,
        'size': i,
        'function': 'featurebased'
    })
    experiment.end()

In [11]:
parameters = param('facilitylocation')
search_alg = "RND"
algo = algo(search_alg, parameters)
for i in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]:
    experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
    n = i    
    def objective(trial):
        config = suggest_config(parameters, trial)
        roc=0.0
        try:
            start_time = time.time()
            Xtr_t, Xte_t = facilityloc(features_t, features_test_t, i, config["function"], "lazy")
            experiment.log_metric("running_time", time.time()-start_time)
            Xtr = Xtr_t.transpose()
            Xte = Xte_t.transpose()
            acc, pre, rec, roc = train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        except Exception as err:
            print(err)
        finally:   
            return roc
    study = optuna.create_study(direction="maximize", sampler=algo)
    study.optimize(objective, n_trials=20, n_jobs=1)
    best_param = study.best_params
    print(best_param)
    experiment.log_parameters({
        'best param': best_param,
        'size': i,
        'function': 'facilitylocation'
    })
    experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/5f2b18ff6b38492d9bdd391200b7181d

[32m[I 2021-11-15 21:01:44,753][0m A new study created in memory with name: no-name-14257891-c2e9-4d6c-9b91-10d6c563efe5[0m
[32m[I 2021-11-15 21:01:44,763][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:44,769][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:44,776][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'cosine'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:44,789][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:44,795][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'cityblock'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-1

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/ff9a57d8b0564a22a9f30be244e9bf24

[32m[I 2021-11-15 21:01:47,912][0m A new study created in memory with name: no-name-09debaf8-dd76-4027-902a-0dcbfee91f78[0m
[32m[I 2021-11-15 21:01:47,918][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:47,923][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:47,929][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'cityblock'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:47,935][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'l2'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:47,940][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with va

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/84af3ee3c4af4576b5793046af2eb9d0

[32m[I 2021-11-15 21:01:51,368][0m A new study created in memory with name: no-name-a90e0dd9-ee8f-46f1-bc30-1c91efa66ba2[0m
[32m[I 2021-11-15 21:01:51,375][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'l2'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:51,382][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:51,385][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'cosine'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:51,391][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:51,396][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/0ff3bb063e494ff398f974afa16a5736

[32m[I 2021-11-15 21:01:57,305][0m A new study created in memory with name: no-name-34a8a321-0c91-46d3-b41f-2fe331e0cd7f[0m
[32m[I 2021-11-15 21:01:57,314][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:57,320][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:57,324][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:57,329][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'euclidean'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:01:57,333][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'l2'}. Best is trial 0 with va

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/5639ad1b0b1c4f6493f78ee9f80e760e

[32m[I 2021-11-15 21:02:04,995][0m A new study created in memory with name: no-name-9360efdb-715e-400d-af40-079b3b32d75c[0m
[32m[I 2021-11-15 21:02:05,003][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:05,008][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:05,013][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:05,024][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'euclidean'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:05,037][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with va

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/ff3b431257d14acdaaa21b2a96e83d17

[32m[I 2021-11-15 21:02:08,250][0m A new study created in memory with name: no-name-7998b60a-23db-476d-afac-f7f8d1d4779d[0m
[32m[I 2021-11-15 21:02:08,255][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:08,263][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:08,271][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'cityblock'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:08,274][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'cosine'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:08,281][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'cosine'}. Best is trial 0

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/8727ba49c5074af2b65cca7430518963

[32m[I 2021-11-15 21:02:16,196][0m A new study created in memory with name: no-name-2034b1b1-a083-4e3c-9f47-0872c34111f5[0m
[32m[I 2021-11-15 21:02:16,204][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:16,211][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'l2'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:16,217][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:16,224][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'euclidean'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:16,228][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'manhattan'}. Best is trial 0 

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/ea49387bfc6b4be9935cfcb6c66df657

[32m[I 2021-11-15 21:02:22,761][0m A new study created in memory with name: no-name-426f35d3-0b13-416b-8b30-11aa9b25e471[0m
[32m[I 2021-11-15 21:02:22,768][0m Trial 0 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:22,773][0m Trial 1 finished with value: 0.0 and parameters: {'function': 'euclidean'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:22,777][0m Trial 2 finished with value: 0.0 and parameters: {'function': 'l1'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:22,783][0m Trial 3 finished with value: 0.0 and parameters: {'function': 'l2'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2021-11-15 21:02:22,787][0m Trial 4 finished with value: 0.0 and parameters: {'function': 'correlation'}. Best is trial 0 with 

initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subset must be a list, numpy array, or None
initial_subs

COMET INFO: Still uploading


In [None]:
parameters = param('maxcoverage')
search_alg = "GRID"
algo = algo(search_alg, parameters)
for i in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]:
    experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
    experiment.log_parameters({
        'size': i,
        'function': 'maxcoverage'
    })
    n = i    
    def objective(trial):
        config = suggest_config(parameters, trial)
        roc=0.0
        try:
            start_time = time.time()
            Xtr_t, Xte_t = maxcov(features_t, features_test_t, i, "two_stage")
            experiment.log_metric("running_time", time.time()-start_time)
            Xtr = Xtr_t.transpose()
            Xte = Xte_t.transpose()
            acc, pre, rec, roc = train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        except Exception as err:
            print(err)
        finally:   
            return roc
    study = optuna.create_study(direction="maximize", sampler=algo)
    study.optimize(objective, n_trials=20, n_jobs=1)
    best_param = study.best_params
    print(best_param)
    experiment.log_parameters({
        'best param': best_param,
        'size': i,
        'function': 'maxcoverage'
    })
    experiment.end()

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/615ccb1b91d0406db168c51db259bac2

[32m[I 2021-11-15 21:12:43,740][0m A new study created in memory with name: no-name-6ff6a5b4-6c09-48c9-b978-fff6b444b865[0m
[32m[I 2021-11-15 21:12:50,326][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:12:56,845][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:03,040][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:09,487][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:15,985][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:22,197][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:28,767][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:34,913][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:41,289][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:13:47,301][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'stochastic'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/e709572b295d4e2281b9e9606dd1614e

[32m[I 2021-11-15 21:13:54,105][0m A new study created in memory with name: no-name-5904779a-ee20-462b-92e0-8a265f1a2bb7[0m
[32m[I 2021-11-15 21:14:00,739][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:07,319][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:13,508][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:20,142][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:26,385][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:33,002][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:39,133][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:45,684][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:52,306][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:14:58,446][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'random'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/e09c10b372da4bafb7e573c9c7fd36ba

[32m[I 2021-11-15 21:15:03,264][0m A new study created in memory with name: no-name-df05065f-1635-4803-89fa-168c22d9637d[0m
[32m[I 2021-11-15 21:15:10,004][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:15:16,315][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:15:44,196][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:15:50,284][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:15:56,868][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:03,376][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:09,496][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:15,980][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:22,050][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:28,642][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'greedi'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/9a8955061fa24465951c190387963638

[32m[I 2021-11-15 21:16:33,008][0m A new study created in memory with name: no-name-ee1f0977-6cab-4458-9b21-28f0ff0d3bfa[0m
[32m[I 2021-11-15 21:16:39,167][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:45,727][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:51,923][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:16:58,662][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:04,739][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:11,237][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:17,645][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:23,710][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:30,257][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:36,338][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'approximate-lazy'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/a4ae670c96fd49b0ada5471ef0a28e76

[32m[I 2021-11-15 21:17:42,603][0m A new study created in memory with name: no-name-28ae9f3d-54ce-4d6f-9e72-0fba58a3acec[0m
[32m[I 2021-11-15 21:17:49,168][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:17:55,337][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:01,945][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:07,980][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:14,019][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:20,546][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:27,115][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:33,181][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:39,782][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:18:45,845][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'lazy'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/bbca911ed9144f338b655c572fdf4a97

[32m[I 2021-11-15 21:18:49,568][0m A new study created in memory with name: no-name-e2c473cd-8da4-4c22-ae36-d3d2937972a6[0m
[32m[I 2021-11-15 21:18:56,292][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:02,468][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:09,182][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:15,364][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:21,470][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:28,099][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:34,673][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:40,867][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:46,856][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:19:53,461][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'stochastic'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/d47df841a5684c989bab4f7bb383260d

[32m[I 2021-11-15 21:19:58,863][0m A new study created in memory with name: no-name-d5e5c9db-d878-4a0d-8200-1fa030dd95ff[0m
[32m[I 2021-11-15 21:20:05,817][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'bidirectional'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:12,116][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'sample'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:18,349][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'random'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:25,012][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:31,070][0m Trial 4 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:37,624][0m Trial 5 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:43,763][0m Trial 6 finished with value: 0.0 and parameters: {'optimizer': 'naive'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:50,355][0m Trial 7 finished with value: 0.0 and parameters: {'optimizer': 'greedi'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:20:56,491][0m Trial 8 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:21:03,081][0m Trial 9 finished with value: 0.0 and parameters: {'optimizer': 'two-stage'}. Best is trial 0 with value: 0.0.[0m
COMET INFO: Still uploading


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)
{'optimizer': 'bidirectional'}


COMET INFO: Experiment is live on comet.ml https://www.comet.ml/apricot/tweet-disaster/1296b684b93848a2868fbd8b1c5d8a81

[32m[I 2021-11-15 21:21:07,010][0m A new study created in memory with name: no-name-83ff6f59-628c-4ef3-b08f-9730aa65fb52[0m
[32m[I 2021-11-15 21:21:13,202][0m Trial 0 finished with value: 0.0 and parameters: {'optimizer': 'lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:21:19,996][0m Trial 1 finished with value: 0.0 and parameters: {'optimizer': 'modular'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:21:26,213][0m Trial 2 finished with value: 0.0 and parameters: {'optimizer': 'approximate-lazy'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


[32m[I 2021-11-15 21:21:32,434][0m Trial 3 finished with value: 0.0 and parameters: {'optimizer': 'stochastic'}. Best is trial 0 with value: 0.0.[0m


No matching definition for argument type(s) array(float64, 1d, C), array(int32, 1d, C), array(int32, 1d, C), array(float64, 1d, C), array(float64, 1d, C), unicode_type, array(int64, 1d, C)


In [None]:
def randomtrain(X_tr, X_te, n):
    X_train_arr = X_tr.to_numpy()
    X_test_arr = X_te.to_numpy() 
    print(X_train_arr.shape, X_test_arr.shape)
    idxs = np.arange(X_tr.shape[0])
    np.random.shuffle(idxs)
    idx = idxs[:n]
    Xtr, Xte = X_train_arr[idx,:], X_test_arr[idx,:]
    return Xtr, Xte

In [None]:
for j in [100, 500, 1000, 2500, 5000, 7500, 8000, 9000]: 
    for i in range(1,20):
        n = j
        experiment=init_experiment(api_key, 'tweet_disaster', 'apricot')
        experiment.log_parameters({
            'size': i,
            'function': 'random'
        })
        Xtr_t, Xte_t = randomtrain(features_t, features_test_t, n)
        Xtr = Xtr_t.transpose()
        Xte = Xte_t.transpose()
        from apricot_exp.evaluation import train_eval
        train_eval(model, Xtr, Y_train, Xte, Y_test, experiment)
        i = i+1
        experiment.end()

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=20, max_depth=3)

In [None]:
from apricot_exp.evaluation import train_eval
train_eval(model, features,  Y_train, features_test, Y_test, experiment)

## PCA

In [None]:
#pca = PCA(n_components=2)

In [None]:
#from sklearn.preprocessing import StandardScaler
#f = list(range(0, 10000))
#x = features[:, f]
#x = StandardScaler().fit_transform(x)
#x = pd.DataFrame(x)

In [None]:
#from sklearn.decomposition import PCA
#pca = PCA(n_components=100)
#x_pca = pca.fit_transform(x)
#x_pca = pd.DataFrame(x_pca)
#x_pca.head()

## Visualization

In [None]:
experiment.display(tab="tweet disaster")

In [None]:
import neptune
project = neptune.init(project_qualified_name='bartalisd/nlp-disaster-tweets',
             api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiNmY5YjdlNTgtNjMxNC00ODQyLThiNGItNmM5NTBmZTM1MjA4In0=', 
            )

# Download experiments dashboard as pandas DataFrame
data = project.get_leaderboard()

data=data[data['tags'].map(len) > 1]
data = data.drop(['name','created', 'finished','owner','notes', 'running_time','size'], axis=1)
for i in ['channel_roc', 'channel_acc', 'channel_rec', 'channel_pre']:
    data[i]=data[i].astype("float64")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
def barplot_res(dataframe, percentage, metric, y_name):
    selector = dataframe.apply(lambda x: percentage in x["tags"], axis=1)
    data_percentage=dataframe.loc[selector, :]
    data_percentage["tags"]=data_percentage["tags"].apply(lambda x: str(x))
    res_percentage=data_percentage.groupby(by='tags')[metric].mean().reset_index()
    res_percentage['model']=res_percentage['tags'].apply(lambda x: eval(x)[1])
    ax=sns.barplot(data=res_percentage, x='model', y=metric)
    plt.ylim((0.5, 0.9))
    ax.set(xlabel='Model', ylabel=y_name, title =percentage)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=60)
    return ax, res_percentage
def lineplot_res(dataframe, y_name, metric, res5k, mod):
    data=dataframe.copy()
    data['percentage']=data['tags'].apply(lambda x: x[1])
    data=data[~data['percentage'].apply(lambda x: 'd' in str(x))]
    data['model']=data['tags'].apply(lambda x: x[0])
    functions=['facilityloc_d', 'featurebased_d', 'maxcov_d', 'random_d', 'Tfidf']
    data=data[data['model'].isin(functions)]
    result=data.groupby(by=['model','percentage'])[metric].mean().reset_index()
    ax=sns.lineplot(data=result, x='percentage', y=metric, hue='model', palette=['yellow', 'b','r','forestgreen','blueviolet'  ])
    ax.set_title(mod,fontdict= { 'fontsize': 20, 'fontweight':'bold'})
    plt.xlabel('Number of features', fontsize= 15)
    plt.ylabel(y_name, fontsize= 15)
    plt.axhline(y=res5k, color='brown', linestyle='--')
    plt.setp(ax.get_legend().get_texts(), fontsize='15')
    plt.setp(ax.get_legend().get_title(), fontsize='20')
    return ax

In [None]:
lineplot_res(data, 'Accuracy', 'channel_acc', 0.7984049623393886, 'NLP - Disasters tweets: Dim reduction' )

In [None]:
lineplot_res(data, 'Precision', 'channel_pre', 0.8259162303664922, 'NLP - Disasters tweets: Dim reduction' )

In [None]:
lineplot_res(data, 'Recall', 'channel_rec', 0.6621196222455404, 'NLP - Disasters tweets: Dim reduction' )

In [None]:
lineplot_res(data, 'ROC AUC', 'channel_roc', 0.8622086211447222, 'NLP - Disasters tweets: Dim reduction' )