In [10]:
import numpy as np
import time
import warnings

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split

from src.data_loader.DataLoader import DataLoader
from src.model.LogisticRegression import LogisticRegression
from src.optim.ADAM import ADAM
from src.optim.IWLS import IWLS
from src.optim.SGD import SGD
from src.optim.conditions import NoLogLikOrMaxIterCondition

warnings.filterwarnings('ignore')

# Example of usage
dl = DataLoader(product=True)
sd = dl.get_supported_datasets()

rows = []
random_states = [420, 69, 42, 24, 96, 1312, 777]
for d in sd:
    print(d)
    x, y = dl[d]

    for random_state in random_states:
        np.random.seed(random_state)
        train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, stratify=y, random_state=random_state)
        print("ADAM")
        start = time.time()
        model = LogisticRegression()
        optim = ADAM(model, NoLogLikOrMaxIterCondition(1000, 25), batch_size=32)
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "ADAM", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("IWLS")
        start = time.time()
        model = LogisticRegression()
        optim = IWLS(model, NoLogLikOrMaxIterCondition(1000, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "IWLS", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("SGD")
        start = time.time()
        model = LogisticRegression()
        optim = SGD(model, NoLogLikOrMaxIterCondition(1000, 25), batch_size=32)
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "SGD", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})


banknote
(1372, 2)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kin8nm
(8192, 8)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
phoneme
(5404, 5)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
elevators
(16599, 11)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
jm1
(10880, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kdd_JapaneseVowels
(9961, 14)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-karhunen
(2000, 64)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-zernike
(2000, 10)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
pc1
(1109, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
AD

In [11]:
import pandas as pd

df = pd.DataFrame(rows)
df["product"] = 1
df

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-0.017323,0.706565,420,28,1
1,banknote,IWLS,-0.006565,0.866729,420,5,1
2,banknote,SGD,-0.013948,0.709297,420,27,1
3,banknote,ADAM,-0.075434,0.793459,69,148,1
4,banknote,IWLS,-0.002293,0.857434,69,6,1
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.001259,0.557153,1312,5,1
185,pc1,SGD,-0.006266,0.500000,1312,25,1
186,pc1,ADAM,-0.006689,0.500000,777,25,1
187,pc1,IWLS,-0.001250,0.560379,777,5,1


In [12]:
import numpy as np
import time
import warnings

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split

from src.data_loader.DataLoader import DataLoader
from src.model.LogisticRegression import LogisticRegression
from src.optim.ADAM import ADAM
from src.optim.IWLS import IWLS
from src.optim.SGD import SGD
from src.optim.conditions import NoLogLikOrMaxIterCondition

warnings.filterwarnings('ignore')

# Example of usage
dl = DataLoader(product=False)
sd = dl.get_supported_datasets()

rows = []
random_states = [420, 69, 42, 24, 96, 1312, 777]
for d in sd:
    print(d)
    x, y = dl[d]

    for random_state in random_states:
        np.random.seed(random_state)
        train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, stratify=y, random_state=random_state)
        print("ADAM")
        start = time.time()
        model = LogisticRegression()
        optim = ADAM(model, NoLogLikOrMaxIterCondition(1000, 25), batch_size=32)
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "ADAM", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("IWLS")
        start = time.time()
        model = LogisticRegression()
        optim = IWLS(model, NoLogLikOrMaxIterCondition(1000, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "IWLS", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("SGD")
        start = time.time()
        model = LogisticRegression()
        optim = SGD(model, NoLogLikOrMaxIterCondition(1000, 25), batch_size=32)
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "SGD", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})


banknote
(1372, 2)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kin8nm
(8192, 8)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
phoneme
(5404, 5)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
elevators
(16599, 11)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
jm1
(10880, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kdd_JapaneseVowels
(9961, 14)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-karhunen
(2000, 64)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-zernike
(2000, 10)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
pc1
(1109, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
AD

In [13]:
import pandas as pd

ndf = pd.DataFrame(rows)
ndf["product"] = 0
ndf

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-0.006066,0.753013,420,28,0
1,banknote,IWLS,-0.001151,0.868912,420,8,0
2,banknote,SGD,-0.005067,0.766674,420,34,0
3,banknote,ADAM,-0.011672,0.790727,69,73,0
4,banknote,IWLS,-0.000901,0.854153,69,6,0
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.000981,0.561992,1312,7,0
185,pc1,SGD,-0.003709,0.500000,1312,25,0
186,pc1,ADAM,-0.004133,0.500000,777,25,0
187,pc1,IWLS,-0.000852,0.583731,777,6,0


In [14]:
total = pd.concat((df, ndf), axis=0)

In [15]:
total

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-0.017323,0.706565,420,28,1
1,banknote,IWLS,-0.006565,0.866729,420,5,1
2,banknote,SGD,-0.013948,0.709297,420,27,1
3,banknote,ADAM,-0.075434,0.793459,69,148,1
4,banknote,IWLS,-0.002293,0.857434,69,6,1
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.000981,0.561992,1312,7,0
185,pc1,SGD,-0.003709,0.500000,1312,25,0
186,pc1,ADAM,-0.004133,0.500000,777,25,0
187,pc1,IWLS,-0.000852,0.583731,777,6,0


In [16]:
total.groupby(["dataset", "optim", "product"])["accuracy"].mean()


dataset             optim  product
banknote            ADAM   0          0.771051
                           1          0.753887
                    IWLS   0          0.863373
                           1          0.864777
                    SGD    0          0.756923
                           1          0.676941
elevators           ADAM   0          0.485477
                           1          0.493690
                    IWLS   0          0.649665
                           1          0.649949
                    SGD    0          0.513080
                           1          0.504279
jm1                 ADAM   0          0.500000
                           1          0.500000
                    IWLS   0          0.519138
                           1          0.519311
                    SGD    0          0.500000
                           1          0.500000
kdd_JapaneseVowels  ADAM   0          0.500000
                           1          0.500000
                    IWLS 

In [17]:
total.to_csv("wyniki.csv")

In [18]:
import numpy as np
import time
import warnings

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split

from src.data_loader.DataLoader import DataLoader
from src.model.LogisticRegression import LogisticRegression
from src.optim.ADAM import ADAM
from src.optim.IWLS import IWLS
from src.optim.SGD import SGD
from src.optim.conditions import NoLogLikOrMaxIterCondition

warnings.filterwarnings('ignore')

# Example of usage
dl = DataLoader(product=True)
sd = dl.get_supported_datasets()

x, y = dl["jm1"]
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, stratify=y)
model = LogisticRegression()
optim = ADAM(model, NoLogLikOrMaxIterCondition(1000, 3), batch_size=1)
model = optim.optimize(train_x, train_y)
optim.stop_condition.epoch

(10880, 4)


13