In [1]:
import os
import numpy as np
import tensorflow as tf
 
from tensorflow_vgg import vgg16
from tensorflow_vgg import utils

In [2]:
data_dir = 'flower_photos/'
contents = os.listdir(data_dir)
classes = [each for each in contents if os.path.isdir(data_dir + each)]

In [4]:
# 首先设置计算batch的值，如果运算平台的内存越大，这个值可以设置得越高
batch_size = 20
# 用codes_list来存储特征值
codes_list = []
# 用labels来存储花的类别
labels = []
# batch数组用来临时存储图片数据
batch = []
 
codes = None
 
with tf.Session() as sess:
    # 构建VGG16模型对象
    vgg = vgg16.Vgg16()
    input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
    with tf.name_scope("content_vgg"):
        # 载入VGG16模型
        vgg.build(input_)
    
    # 对每个不同种类的花分别用VGG16计算特征值
    for each in classes:
        print("Starting {} images".format(each))
        class_path = data_dir + each
        files = os.listdir(class_path)
        for ii, file in enumerate(files, 1):
            # 载入图片并放入batch数组中
            img = utils.load_image(os.path.join(class_path, file))
            batch.append(img.reshape((1, 224, 224, 3)))
            labels.append(each)
            
            # 如果图片数量到了batch_size则开始具体的运算
            if ii % batch_size == 0 or ii == len(files):
                images = np.concatenate(batch)
 
                feed_dict = {input_: images}
                # 计算特征值
                codes_batch = sess.run(vgg.relu6, feed_dict=feed_dict)
                
                # 将结果放入到codes数组中
                if codes is None:
                    codes = codes_batch
                else:
                    codes = np.concatenate((codes, codes_batch))
                
                # 清空数组准备下一个batch的计算
                batch = []
                print('{} images processed'.format(ii))

C:\Users\HanY\Desktop\VGG_16\tensorflow_vgg\vgg16.npy
npy file loaded
build model started
build model finished: 1s
Starting ben images
20 images processed
40 images processed
60 images processed
80 images processed
100 images processed
120 images processed
140 images processed
160 images processed
180 images processed
200 images processed
220 images processed
240 images processed
260 images processed
280 images processed
300 images processed
320 images processed
340 images processed
360 images processed
380 images processed
400 images processed
420 images processed
440 images processed
460 images processed
480 images processed
500 images processed
520 images processed
540 images processed
560 images processed
580 images processed
600 images processed
620 images processed
640 images processed
660 images processed
680 images processed
700 images processed
720 images processed
740 images processed
760 images processed
780 images processed
800 images processed
820 images processed
840 imag

In [5]:
with open('codes', 'w') as f:
    codes.tofile(f)
    
import csv
with open('labels', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)

In [6]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [7]:
# data = ['北京', '上海']
label_encoder = LabelEncoder()
label_encoded = label_encoder.fit_transform(labels)
# print(label_encoded)
labels_vecs = OneHotEncoder()
labels_vecs = labels_vecs.fit_transform(label_encoded.reshape(-1, 1)).toarray()
print(labels_vecs)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [8]:
codes.shape

(6337, 4096)

In [9]:
from sklearn.model_selection import StratifiedShuffleSplit
 
ss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)
 
train_idx, val_idx = next(ss.split(codes, labels))
 
half_val_len = int(len(val_idx)/2)
val_idx, test_idx = val_idx[:half_val_len], val_idx[half_val_len:]
 
train_x, train_y = codes[train_idx], labels_vecs[train_idx]
val_x, val_y = codes[val_idx], labels_vecs[val_idx]
test_x, test_y = codes[test_idx], labels_vecs[test_idx]
 
print("Train shapes (x, y):", train_x.shape, train_y.shape)
print("Validation shapes (x, y):", val_x.shape, val_y.shape)
print("Test shapes (x, y):", test_x.shape, test_y.shape)

Train shapes (x, y): (5069, 4096) (5069, 2)
Validation shapes (x, y): (634, 4096) (634, 2)
Test shapes (x, y): (634, 4096) (634, 2)


In [10]:
# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]])
# 标签数据的维度
labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])
 
# 加入一个256维的全连接的层
fc = tf.contrib.layers.fully_connected(inputs_, 256)
 
# 加入一个5维的全连接层
logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)
 
# 计算cross entropy值
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)
 
# 计算损失函数
cost = tf.reduce_mean(cross_entropy)
 
# 采用用得最广泛的AdamOptimizer优化器
optimizer = tf.train.AdamOptimizer().minimize(cost)
 
# 得到最后的预测分布
predicted = tf.nn.softmax(logits)
 
# 计算准确度
correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
auc_value, auc_op = tf.metrics.auc(labels_, predicted)
confusion_matrix = tf.confusion_matrix(tf.argmax(labels_, 1), tf.argmax(predicted, 1))


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


In [11]:
print (labels_)
print (predicted)

Tensor("Placeholder_3:0", shape=(?, 2), dtype=int64)
Tensor("Softmax:0", shape=(?, 2), dtype=float32)


In [12]:
def get_batches(x, y, n_batches=10):
    """ 这是一个生成器函数，按照n_batches的大小将数据划分了小块 """
    batch_size = len(x)//n_batches
    
    for ii in range(0, n_batches*batch_size, batch_size):
        # 如果不是最后一个batch，那么这个batch中应该有batch_size个数据
        if ii != (n_batches-1)*batch_size:
            X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size] 
        # 否则的话，那剩余的不够batch_size的数据都凑入到一个batch中
        else:
            X, Y = x[ii:], y[ii:]
        # 生成器语法，返回X和Y
        yield X, Y

In [13]:
# 运行多少轮次
epochs = 40
# 统计训练效果的频率
iteration = 0
# 保存模型的保存器
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        for x, y in get_batches(train_x, train_y):
            feed = {inputs_: x,
                    labels_: y}
            # 训练模型
            loss, _ = sess.run([cost, optimizer], feed_dict=feed)
            print("Epoch: {}/{}".format(e+1, epochs),
                  "Iteration: {}".format(iteration),
                  "Training loss: {:.5f}".format(loss))
            iteration += 1
            
            if iteration % 5 == 0:
                feed = {inputs_: val_x,
                        labels_: val_y}
                val_acc = sess.run(accuracy, feed_dict=feed)
#                 val_auc = sess.run(auc_value, feed_dict=feed)
                # 输出用验证机验证训练进度
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Validation Acc: {:.4f}".format(val_acc))
    # 保存模型
    saver.save(sess, "checkpoints/flowers.ckpt")

Epoch: 1/40 Iteration: 0 Training loss: 3.61856
Epoch: 1/40 Iteration: 1 Training loss: 23.95444
Epoch: 1/40 Iteration: 2 Training loss: 21.41974
Epoch: 1/40 Iteration: 3 Training loss: 12.44093
Epoch: 1/40 Iteration: 4 Training loss: 6.84237
Epoch: 0/40 Iteration: 5 Validation Acc: 0.6483
Epoch: 1/40 Iteration: 5 Training loss: 3.46621
Epoch: 1/40 Iteration: 6 Training loss: 1.17880
Epoch: 1/40 Iteration: 7 Training loss: 0.62202
Epoch: 1/40 Iteration: 8 Training loss: 0.69559
Epoch: 1/40 Iteration: 9 Training loss: 0.72284
Epoch: 0/40 Iteration: 10 Validation Acc: 0.3612
Epoch: 2/40 Iteration: 10 Training loss: 0.73161
Epoch: 2/40 Iteration: 11 Training loss: 0.71140
Epoch: 2/40 Iteration: 12 Training loss: 0.71059
Epoch: 2/40 Iteration: 13 Training loss: 0.69921
Epoch: 2/40 Iteration: 14 Training loss: 0.69676
Epoch: 1/40 Iteration: 15 Validation Acc: 0.3517
Epoch: 2/40 Iteration: 15 Training loss: 0.69639
Epoch: 2/40 Iteration: 16 Training loss: 0.69390
Epoch: 2/40 Iteration: 17 Tr

Epoch: 14/40 Iteration: 139 Training loss: 0.65522
Epoch: 13/40 Iteration: 140 Validation Acc: 0.6609
Epoch: 15/40 Iteration: 140 Training loss: 0.66299
Epoch: 15/40 Iteration: 141 Training loss: 0.65653
Epoch: 15/40 Iteration: 142 Training loss: 0.66492
Epoch: 15/40 Iteration: 143 Training loss: 0.65683
Epoch: 15/40 Iteration: 144 Training loss: 0.65556
Epoch: 14/40 Iteration: 145 Validation Acc: 0.6593
Epoch: 15/40 Iteration: 145 Training loss: 0.65046
Epoch: 15/40 Iteration: 146 Training loss: 0.65844
Epoch: 15/40 Iteration: 147 Training loss: 0.65788
Epoch: 15/40 Iteration: 148 Training loss: 0.66293
Epoch: 15/40 Iteration: 149 Training loss: 0.65207
Epoch: 14/40 Iteration: 150 Validation Acc: 0.6593
Epoch: 16/40 Iteration: 150 Training loss: 0.66043
Epoch: 16/40 Iteration: 151 Training loss: 0.65366
Epoch: 16/40 Iteration: 152 Training loss: 0.66261
Epoch: 16/40 Iteration: 153 Training loss: 0.65406
Epoch: 16/40 Iteration: 154 Training loss: 0.65285
Epoch: 15/40 Iteration: 155 Val

Epoch: 27/40 Iteration: 275 Validation Acc: 0.7382
Epoch: 28/40 Iteration: 275 Training loss: 0.57274
Epoch: 28/40 Iteration: 276 Training loss: 0.57032
Epoch: 28/40 Iteration: 277 Training loss: 0.57245
Epoch: 28/40 Iteration: 278 Training loss: 0.57988
Epoch: 28/40 Iteration: 279 Training loss: 0.57422
Epoch: 27/40 Iteration: 280 Validation Acc: 0.7287
Epoch: 29/40 Iteration: 280 Training loss: 0.59107
Epoch: 29/40 Iteration: 281 Training loss: 0.57426
Epoch: 29/40 Iteration: 282 Training loss: 0.56518
Epoch: 29/40 Iteration: 283 Training loss: 0.58896
Epoch: 29/40 Iteration: 284 Training loss: 0.57613
Epoch: 28/40 Iteration: 285 Validation Acc: 0.7382
Epoch: 29/40 Iteration: 285 Training loss: 0.56492
Epoch: 29/40 Iteration: 286 Training loss: 0.56043
Epoch: 29/40 Iteration: 287 Training loss: 0.56360
Epoch: 29/40 Iteration: 288 Training loss: 0.57315
Epoch: 29/40 Iteration: 289 Training loss: 0.56639
Epoch: 28/40 Iteration: 290 Validation Acc: 0.7303
Epoch: 30/40 Iteration: 290 Tra

In [14]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    sess.run(tf.local_variables_initializer())
    feed = {inputs_: test_x,
            labels_: test_y}
    test_acc = sess.run(accuracy, feed_dict=feed)
    sess.run(auc_op, feed_dict=feed)
    predict_test = sess.run(predicted, feed_dict=feed)
    test_auc = sess.run(auc_value, feed_dict=feed)
    matrix = sess.run(confusion_matrix, feed_dict = feed)
    print("Test accuracy: {:.4f}".format(test_acc))
    print("auc: {:.4f}".format(test_auc))
    print(matrix)
    ab = []
    for i in range (len(test_x)):
        xt = test_x[i][np.newaxis,:]
        yt = test_y[i][np.newaxis,:]
        predict_test = sess.run(predicted, feed_dict={inputs_: xt,labels_: yt})
        ab.append(np.hstack((predict_test[0], yt[0])))
#         print(np.hstack((predict_test[0], yt[0])))
        print(np.array(predict_test[0]))
        print(np.array(yt[0]))
    np.savetxt('new2.csv',ab,delimiter =',')

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from checkpoints\flowers.ckpt
Test accuracy: 0.8408
auc: 0.8728
[[ 887  774]
 [  33 3375]]
[0.6946249  0.30537507]
[1. 0.]
[0.6644181  0.33558196]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.40308964 0.59691036]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.9967529  0.00324706]
[1. 0.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.66714567 0.33285433]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.8691951  0.13080488]
[1. 0.]
[0.91886437 0.08113564]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34950525 0.650

[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.9037107  0.09628927]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.37198216 0.62801784]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.99715734 0.00284263]
[1. 0.]
[0.6980456  0.30195442]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.92235464 0.07764538]
[1. 0.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.34824315 0.6517568 ]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.7346317  0.26536828]
[1. 0.]
[0.34824315 0.6517568 ]
[0. 1.]
[0.34824

In [15]:
End

NameError: name 'End' is not defined

In [62]:
def get_batche(x, y, n_batche=66):
    """ 这是一个生成器函数，按照n_batches的大小将数据划分了小块 """
    batch_size = len(x)//n_batche
    
    for ii in range(0, n_batche*batch_size, batch_size):
        # 如果不是最后一个batch，那么这个batch中应该有batch_size个数据
        if ii != (n_batche-1)*batch_size:
            X, Y = x[ii: ii+batch_size], y[ii: ii+batch_size] 
        # 否则的话，那剩余的不够batch_size的数据都凑入到一个batch中
        else:
            X, Y = x[ii:], y[ii:]
        # 生成器语法，返回X和Y
        yield X, Y

In [68]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ab = []
    for x, y in get_batche(test_x, test_y):
            feed = {inputs_: x,
                    labels_: y}
            sess.run(tf.local_variables_initializer())
            predict_test = sess.run(predicted, feed_dict=feed)
            ab.append(np.hstack((predict_test[0], y[0])))
#             print(np.hstack((predict_test[0], y[0])))
            print(predict_test)
            print(np.array(y))

#     np.savetxt('new2.csv',ab,delimiter =',')

[[0.9971534  0.00284653]]
[[0. 1.]]
[[9.9987507e-01 1.2491515e-04]]
[[0. 1.]]
[[9.9998283e-01 1.7197459e-05]]
[[1. 0.]]
[[0.9918508 0.0081492]]
[[0. 1.]]
[[9.9997592e-01 2.4109277e-05]]
[[0. 1.]]
[[0.9295181  0.07048188]]
[[1. 0.]]
[[0.70195925 0.2980408 ]]
[[0. 1.]]
[[0.02803983 0.9719601 ]]
[[0. 1.]]
[[0.9425304 0.0574696]]
[[0. 1.]]
[[0.9598435  0.04015645]]
[[1. 0.]]
[[9.998512e-01 1.487142e-04]]
[[1. 0.]]
[[0.9317851  0.06821484]]
[[0. 1.]]
[[0.34041864 0.65958136]]
[[1. 0.]]
[[0.9114859  0.08851413]]
[[0. 1.]]
[[0.99607134 0.00392862]]
[[1. 0.]]
[[0.99667335 0.00332668]]
[[1. 0.]]
[[0.9922346  0.00776544]]
[[1. 0.]]
[[0.9984875  0.00151251]]
[[1. 0.]]
[[0.04986914 0.95013094]]
[[1. 0.]]
[[9.9988449e-01 1.1545216e-04]]
[[0. 1.]]
[[9.9928361e-01 7.1637915e-04]]
[[0. 1.]]
[[0.9953969  0.00460304]]
[[0. 1.]]
[[0.19074014 0.80925983]]
[[1. 0.]]
[[0.94884866 0.05115132]]
[[1. 0.]]
[[0.95150846 0.04849156]]
[[1. 0.]]
[[0.05749023 0.9425097 ]]
[[0. 1.]]
[[0.99823207 0.001768  ]]
[[1. 0.]

In [23]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
#     sess.run(tf.local_variables_initializer())
    ab = []
    for i in range (len(test_x)):
        xt = test_x[i][np.newaxis,:]
        yt = test_y[i][np.newaxis,:]
        predict_test = sess.run(predicted, feed_dict={inputs_: xt,labels_: yt})
        ab.append(np.hstack((predict_test[0], yt[0])))
#         print(np.hstack((predict_test[0], yt[0])))
        print(np.array(predict_test[0]))
        print(np.array(yt[0]))

#     np.savetxt('new2.csv',ab,delimiter =',')

[6.9715097e-05 9.9993026e-01]
[0. 1.]
[2.2344158e-05 9.9997771e-01]
[1. 0.]
[1.8254350e-05 9.9998176e-01]
[1. 0.]
[0.9185695 0.0814305]
[0. 1.]
[0.05671438 0.94328564]
[1. 0.]
[5.0058536e-04 9.9949944e-01]
[1. 0.]
[0.11746989 0.88253015]
[1. 0.]
[4.8438529e-04 9.9951565e-01]
[1. 0.]
[0.81241965 0.1875804 ]
[0. 1.]
[2.8460303e-07 9.9999976e-01]
[0. 1.]
[0.20918503 0.790815  ]
[0. 1.]
[8.779239e-08 9.999999e-01]
[0. 1.]
[4.856894e-06 9.999951e-01]
[1. 0.]
[3.5028432e-05 9.9996495e-01]
[0. 1.]
[0.95642155 0.04357842]
[0. 1.]
[0.00126926 0.9987307 ]
[0. 1.]
[0.43894693 0.5610531 ]
[0. 1.]
[0.07156464 0.9284353 ]
[1. 0.]
[0.00409978 0.9959002 ]
[0. 1.]
[0.00803164 0.99196833]
[0. 1.]
[0.00304954 0.99695045]
[0. 1.]
[0.31845787 0.68154216]
[1. 0.]
[0.0022448 0.9977552]
[0. 1.]
[0.4045453 0.5954547]
[1. 0.]
[0.748622 0.251378]
[0. 1.]
[2.3592493e-04 9.9976403e-01]
[0. 1.]
[0.00563512 0.99436486]
[1. 0.]
[0.53675324 0.46324673]
[0. 1.]
[6.741014e-04 9.993259e-01]
[0. 1.]
[0.11045805 0.8895419 

[0.46993035 0.53006965]
[1. 0.]
[0.02507903 0.974921  ]
[0. 1.]
[4.1096200e-06 9.9999595e-01]
[0. 1.]
[0.01796676 0.9820332 ]
[1. 0.]
[4.4478500e-05 9.9995553e-01]
[0. 1.]
[0.00374108 0.996259  ]
[0. 1.]
[0.43887064 0.5611294 ]
[0. 1.]
[0.00291158 0.9970885 ]
[1. 0.]
[0.938419   0.06158096]
[0. 1.]
[5.9418619e-04 9.9940586e-01]
[1. 0.]
[0.9317483  0.06825174]
[0. 1.]
[1.1550047e-06 9.9999881e-01]
[0. 1.]
[0.00437012 0.9956299 ]
[1. 0.]
[0.00453479 0.9954652 ]
[1. 0.]
[0.01724345 0.9827565 ]
[0. 1.]
[0.8228156  0.17718448]
[0. 1.]
[0.17195736 0.8280427 ]
[0. 1.]
[0.01441331 0.98558664]
[0. 1.]
[0.00308586 0.99691415]
[0. 1.]
[0.21808884 0.78191113]
[0. 1.]
[0.02263021 0.97736984]
[1. 0.]
[0.23644099 0.763559  ]
[1. 0.]
[0.03864479 0.96135527]
[1. 0.]
[1.538498e-04 9.998461e-01]
[1. 0.]
[0.19430983 0.8056901 ]
[1. 0.]
[0.01157933 0.9884207 ]
[0. 1.]
[0.03972233 0.9602776 ]
[1. 0.]
[0.00251645 0.99748355]
[0. 1.]
[0.56511813 0.43488184]
[1. 0.]
[2.4571772e-08 1.0000000e+00]
[1. 0.]
[0.001

In [7]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [10]:
data = ['北京', '上海', '广州', '成都', '杭州', '深圳']
label_encoder = LabelEncoder()
label_encoded = label_encoder.fit_transform(labels)
print(label_encoded)
one_hot_encoder = OneHotEncoder()
one_hot_encoded = one_hot_encoder.fit_transform(label_encoded.reshape(-1, 1)).toarray()
print(one_hot_encoded)

[0 0 0 ... 1 1 1]
[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [None]:
predicted

In [None]:
a = np.array([1,1,1,1])
 
b = a[np.newaxis,:]
c = a[:,np.newaxis]

In [None]:
print(b)

In [None]:
b.shape