forked from dimitreOliveira/DogBreedKeras
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
124 lines (98 loc) · 3.36 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import cv2
import h5py
import os.path
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
def load_train_dataset(df_train, img_size):
file_path = 'data/x_train.h5'
if os.path.exists(file_path):
with h5py.File(file_path, 'r') as hf:
x_train = hf['x_train'][:]
else:
x_train = []
for f, breed in tqdm(df_train.values):
img = cv2.imread('data/train/{}.jpg'.format(f))
x_train.append(cv2.resize(img, (img_size, img_size)))
with h5py.File(file_path, 'w') as hf:
hf.create_dataset("x_train", data=x_train)
return x_train
def load_test_dataset(df_test, img_size):
file_path = 'data/x_test.h5'
if os.path.exists(file_path):
with h5py.File(file_path, 'r') as hf:
x_test = hf['x_test'][:]
else:
x_test = []
for f in tqdm(df_test['id'].values):
img = cv2.imread('data/test/{}.jpg'.format(f))
x_test.append(cv2.resize(img, (img_size, img_size)))
with h5py.File(file_path, 'w') as hf:
hf.create_dataset("x_test", data=x_test)
return x_test
def load_train_labels(df_train, one_hot_labels):
file_path = 'data/y_train.h5'
if os.path.exists(file_path):
with h5py.File(file_path, 'r') as hf:
y_train = hf['y_train'][:]
else:
y_train = []
for i in tqdm(range(len(df_train.values))):
label = one_hot_labels[i]
y_train.append(label)
with h5py.File(file_path, 'w') as hf:
hf.create_dataset("y_train", data=y_train)
return y_train
# def load_train_dataset(df_train, img_size):
# x_train = []
# for f, breed in tqdm(df_train.values):
# img = cv2.imread('data/train/{}.jpg'.format(f))
# x_train.append(cv2.resize(img, (img_size, img_size)))
#
# return x_train
#
#
# def load_test_dataset(df_test, img_size):
# x_test = []
# for f in tqdm(df_test['id'].values):
# img = cv2.imread('data/test/{}.jpg'.format(f))
# x_test.append(cv2.resize(img, (img_size, img_size)))
#
# return x_test
#
#
# def load_train_labels(df_train, one_hot_labels):
# y_train = []
# for i in tqdm(range(len(df_train.values))):
# label = one_hot_labels[i]
# y_train.append(label)
#
# return y_train
def plot_loss_accuracy(history):
plt.figure(1)
# summarize history for accuracy
plt.subplot(211)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
# summarize history for loss
plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
def output_submission(preds, one_hot, df_test):
submission = pd.DataFrame(preds)
# Set column names to those generated by the one-hot encoding earlier
col_names = one_hot.columns.values
submission.columns = col_names
# Insert the column id from the sample_submission at the start of the data frame
submission.insert(0, 'id', df_test['id'])
submission.to_csv('submissions/submission.csv', encoding='utf-8', index=False)
print('submission outputted')