In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
from sklearn.preprocessing import minmax_scale

from numpy.random import default_rng
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
np.set_printoptions(suppress=True, precision=2)
plt.style.use('seaborn') # pretty matplotlib plots
sns.set(font_scale=2)

In [2]:
# Load 3 feature version of x arrays
x_train = np.loadtxt('./data_sneaker_vs_sandal/x_train.csv', delimiter=',', skiprows=1)
x_test = np.loadtxt('./data_sneaker_vs_sandal/x_test.csv', delimiter=',', skiprows=1)

y_train = np.loadtxt('./data_sneaker_vs_sandal/y_train.csv', delimiter=',', skiprows=1)

In [3]:
k = x_train[0].reshape(7,7,4,4)
k.shape
k

array([[[[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]],

        [[0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ],
         [0.  , 0.  , 0.  , 0.  ]]],


    

In [4]:
aver = np.max(k,axis=3)
aver

array([[[0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ]],

       [[0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  ]],

       [[0.  , 0.  , 0.  , 0.02],
        [0.01, 0.  , 0.02, 0.  ],
        [0.  , 0.  , 0.02, 0.9 ],
        [0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.35, 0.88, 0.66],
        [0.49, 0.  , 0.  , 0.  ],
        [0.98, 0.84, 0.94, 0.7 ]],

       [[0.01, 0.02, 0.58, 0.93],
        [0.82, 0.46, 0.98, 0.01],
        [0.  , 0.94, 0.84, 0.81],
        [0.91, 0.09, 0.  , 0.84],
        [0.93, 0.84, 0.81, 0.81],
        [0.9 , 0.62, 0.8 , 0.83],
        [0.84, 0.8 , 0.95, 0.84]],

       [[0.78, 0.78, 0.78, 0.78],
      

In [5]:
t = np.max(aver,axis=2)
t.flatten()
t.reshape(49)
t

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.02, 0.02, 0.9 , 0.  , 0.88, 0.49, 0.98],
       [0.93, 0.98, 0.94, 0.91, 0.93, 0.9 , 0.95],
       [0.78, 0.85, 1.  , 0.89, 0.57, 0.12, 0.36],
       [0.17, 0.19, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]])

In [6]:
x_train[0].reshape(28,28)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0. 

In [7]:
x_ft = np.append(x_train[i], t)
x_ft

NameError: name 'i' is not defined

In [8]:
arr = np.array([[[1],[1],[1],[1]],
                [[2],[2],[2],[2]],
                [[3],[3],[3],[3]],
               [[4],[4],[4],[4]]])

In [9]:
arr

array([[[1],
        [1],
        [1],
        [1]],

       [[2],
        [2],
        [2],
        [2]],

       [[3],
        [3],
        [3],
        [3]],

       [[4],
        [4],
        [4],
        [4]]])

In [10]:
arr.shape

(4, 4, 1)

In [11]:
arr = np.zeros(36)
arr[:12] = 1
arr

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0.])

In [12]:
arr.reshape(2,6,3)

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [13]:
x_ft_i = []
for i in range(len(x_train)):
    k = x_train[i].reshape(7,7,4,4)
    aver = np.max(k,axis=3)
    t = np.max(aver,axis=2)
    t.reshape(49)
    x_ft = np.append(x_train[i], t)
    x_ft_i.append(x_ft)

In [14]:
x_test_i = []
for i in range(len(x_test)):
    k = x_train[i].reshape(7,7,4,4)
    aver = np.max(k,axis=3)
    t = np.max(aver,axis=2)
    t.reshape(49)
    x_ft = np.append(x_test[i], t)
    x_test_i.append(x_ft)

In [15]:
from sklearn.preprocessing import minmax_scale

In [16]:
x_ft_sp = minmax_scale(x_ft_i)
x_te_sp = minmax_scale(x_test_i)

In [17]:
model = LogisticRegression(solver='liblinear')
model.fit(x_ft_sp, y_train)

prob_tr = model.predict_proba(x_ft_sp)
# prob_va = model.predict_proba(x_va)

acc_tr = model.score(x_ft_sp, y_train)
# acc_va = model.score(x_va, y_va)

tr_log_loss = log_loss(y_train,prob_tr)
# va_log_loss = log_loss(y_va,prob_va)

print('train log loss', tr_log_loss)
# print('validation log loss', va_log_loss)

print('train acc', acc_tr)
# print('validation acc', acc_va)

train log loss 0.06565693354550449
train acc 0.9790833333333333


In [18]:
yproba1_test = model.predict_proba(x_te_sp)[:, 1] 
np.savetxt('yproba1_test.txt', yproba1_test)