**Network architecture**
- [ ] Number of hidden layers (network depth) 
- [ ] Number of neurons in each layer (layer width) 
- [ ] Activation type 

**Learning and optimization**
- [ ] Learning rate and decay schedule
- [ ] Mini-batch size
- [ ] Optimization algorithms
- [ ] Number of training iterations or epochs (and early stopping criteria)

**Regularization techniques to avoid overfitting** 
- [ ] L2 regularization
- [ ] Dropout layers
- [ ] Data augmentation
- [ ] Batch normalization
- [ ] Transfer learning


# 變數
- [ ] 圖片大小 600*600
- [ ] learning_rate 1e-5
- [ ] batchsize 10
- [ ] Class weight



# 紀錄
- [ ] val_loss: 0.4730 - val_accuracy: 0.8344(450*450,1e-4)
- [ ] val_loss: 0.4667 - val_accuracy: 0.8360(500*500,1e-5)
- [ ] loss: 0.3391 - accuracy: 0.8854 - val_loss: 0.4624 - val_accuracy: 0.8425
    (600*600, 1e-5)

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers

In [None]:
# mount drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# select 'add to my drive' on the shared folder

root_path = 'gdrive/MyDrive/final_project'
train_dir='gdrive/MyDrive/final_project/train/train' #train jpg save place 
train_df = pd.read_csv(f'{root_path}/train_data.csv')


train_df=train_df.append(train_df.loc[1,:],ignore_index=True)
train_df['Type'] = train_df['Type'].astype('str') #如果class_mode = 'sparse'要是string要是string

## InceptionResNetV2

In [None]:
# Data agumentation and pre-processing using tensorflow
train_gen = ImageDataGenerator(   
                rotation_range = 15,
                zoom_range = 0.15,
                width_shift_range=0.1,
                height_shift_range=0.1,
                shear_range=0.1,
                fill_mode='nearest',
                rescale=1./255.,
                horizontal_flip = True,
                validation_split=0.2005 # training: 80% data, validation: 20% data
                 )

valid_gen = ImageDataGenerator(   
                # rotation_range = 40,
                # zoom_range = 0.15,
                # width_shift_range=0.2,
                # height_shift_range=0.2,
                # shear_range=0.2,
                # fill_mode='nearest',
                rescale=1./255.,
                # horizontal_flip = True,
                validation_split=0.2005 # training: 80% data, validation: 20% data
                 )
  
train_generator = train_gen.flow_from_dataframe(
    train_df, # dataframe
    directory = train_dir, # images data path / folder in which images are there
    x_col = 'Name',
    y_col = 'Type',
    subset="training",
    color_mode="rgb",
    target_size = (600,600), # image height , image width
    class_mode="categorical",
    batch_size=10,
    shuffle=True,
    seed=42,
)
  
  
validation_generator = valid_gen.flow_from_dataframe(
    train_df, # dataframe
    directory = train_dir, # images data path / folder in which images are there
    x_col = 'Name',
    y_col = 'Type',
    subset="validation",
    color_mode="rgb",
    target_size = (600,600), # image height , image width
    class_mode="categorical",
    batch_size=10,
    shuffle=True,
    seed=42,
)

Found 7420 validated image filenames belonging to 4 classes.
Found 1860 validated image filenames belonging to 4 classes.


In [None]:
# train_labels=np.array([])
# for i in tqdm(range(train_generator.n//train_generator.batch_size)):
#   feat = train_generator[i][-1]
#   labels = np.argmax(feat, axis=1)
#   train_labels = np.append(train_labels,labels)


100%|██████████| 742/742 [27:00<00:00,  2.18s/it]


In [None]:
# from sklearn.utils import class_weight
# ClassWeights = dict(zip(np.unique(train_labels),
#                         class_weight.compute_class_weight('balanced',
#                                                 classes=np.unique(train_labels),y=train_labels)))
# print(ClassWeights) 

{0.0: 0.8672276764843385, 1.0: 1.3081805359661496, 2.0: 1.1906290115532734, 3.0: 0.8047722342733189}


In [None]:
ClassWeights ={0.0: 0.8672276764843385, 1.0: 1.3081805359661496, 2.0: 1.1906290115532734, 3.0: 0.8047722342733189}

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import CSVLogger


# load the InceptionResNetV2 architecture with imagenet weights as base
base_model = tf.keras.applications.InceptionResNetV2(
                     include_top=False,
                     weights='imagenet',
                     input_shape=(600,600,3)
                     )
  
base_model.trainable=False
# For freezing the layer we make use of layer.trainable = False
# means that its internal state will not change during training.
# model's trainable weights will not be updated during fit(),
# and also its state updates will not run.
  
model = tf.keras.Sequential([ 
        base_model,   
        tf.keras.layers.BatchNormalization(renorm=True),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(4, activation='softmax')
    ])

opt = optimizers.Adam(learning_rate = 1e-5)

model.compile(optimizer=opt,loss='categorical_crossentropy',metrics=['accuracy'])
# categorical cross entropy is taken since its used as a loss function for 
# multi-class classification problems where there are two or more output labels.
# using Adam optimizer for better performance
# other optimizers such as sgd can also be used depending upon the model

log = CSVLogger('gdrive/MyDrive/final_project/code/soil/log3.csv',)

mc = ModelCheckpoint(os.path.join('gdrive/MyDrive/final_project/code/soil/resnetV2_augment_lr_600_3.h5'),
                     monitor='val_accuracy',
                     verbose=1,
                     save_best_only=True,
                     save_weights_only=True,
                     mode='max')

early = tf.keras.callbacks.EarlyStopping(  
                      #  monitor = 'val_accuracy',
                      #  mode = 'max',
                       patience=20,
                       min_delta=0.0001,
                      #  restore_best_weights=True
                       )



batch_size=10
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = validation_generator.n//validation_generator.batch_size
  


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model.load_weights('gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5')

In [None]:
history2 = model.fit(
                    train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    class_weight = ClassWeights,
                    epochs = 10,
                    callbacks=[early,mc,log]
                    )

Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.83495, saving model to gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.83495 to 0.83548, saving model to gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.83548 to 0.83763, saving model to gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5
Epoch 4/10
Epoch 00004: val_accuracy did not improve from 0.83763
Epoch 5/10
Epoch 00005: val_accuracy did not improve from 0.83763
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.83763
Epoch 7/10
Epoch 00007: val_accuracy did not improve from 0.83763
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.83763
Epoch 9/10
Epoch 00009: val_accuracy improved from 0.83763 to 0.83925, saving model to gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5
Epoch 10/10
Epoch 00010: val_accuracy did not improve from 0.83925


In [None]:
model.load_weights('gdrive/MyDrive/final_project/code/resnetV2_augment_lr_600.h5')

In [None]:
history = model.fit(
                    train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=validation_generator,
                    validation_steps=STEP_SIZE_VALID,
                    class_weight = ClassWeights,
                    epochs = 50,
                    callbacks=[early,mc,log]
                    )

Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.83871, saving model to gdrive/MyDrive/final_project/code/soil/resnetV2_augment_lr_600_3.h5
Epoch 2/50
Epoch 00002: val_accuracy did not improve from 0.83871
Epoch 3/50
Epoch 00003: val_accuracy did not improve from 0.83871
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.83871
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.83871
Epoch 6/50
Epoch 00006: val_accuracy did not improve from 0.83871
Epoch 7/50
Epoch 00007: val_accuracy did not improve from 0.83871
Epoch 8/50
Epoch 00008: val_accuracy did not improve from 0.83871
Epoch 9/50
Epoch 00009: val_accuracy did not improve from 0.83871
Epoch 10/50
Epoch 00010: val_accuracy did not improve from 0.83871
Epoch 11/50
Epoch 00011: val_accuracy did not improve from 0.83871
Epoch 12/50
Epoch 00012: val_accuracy did not improve from 0.83871
Epoch 13/50
Epoch 00013: val_accuracy improved from 0.83871 to 0.83925, saving model to gdrive/MyDrive/final_project/co

In [None]:
model.load_weights('gdrive/MyDrive/final_project/code/soil/resnetV2_augment_lr_600_3.h5')

In [None]:
import matplotlib.pyplot as plt


# store results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
  
  
# plot results
# accuracy
plt.figure(figsize=(10, 16))
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['font.size'] = 14
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.facecolor'] = 'white'
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title(f'\nTraining and Validation Accuracy. \nTrain Accuracy: {str(acc[-1])}\nValidation Accuracy: {str(val_acc[-1])}')

In [None]:
# loss
plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title(f'Training and Validation Loss. \nTrain Loss: {str(loss[-1])}\nValidation Loss: {str(val_loss[-1])}')
plt.xlabel('epoch')
plt.tight_layout(pad=3.0)
plt.show()

In [None]:
# testing data extract features
test_files = pd.read_csv('/content/gdrive/MyDrive/final_project/sample_output.csv')
test_files['Type'] = test_files['Type'].astype('str')
test_dir='gdrive/MyDrive/final_project/test/test'

test_datagen = ImageDataGenerator(rescale=1./255)
testing_generator = test_datagen.flow_from_dataframe(
    dataframe = test_files,
    directory = test_dir,
    target_size = (500,500),
    x_col = 'Name',
    y_col = 'Type',
    batch_size = 10,
    class_mode = None,
    shuffle = False
)

Found 2800 validated image filenames.


In [None]:
pred = model.predict(testing_generator)

In [None]:
pred = np.argmax(pred, axis=1)
pd.DataFrame(pred)

Unnamed: 0,0
0,3
1,1
2,0
3,0
4,0
...,...
2795,3
2796,2
2797,3
2798,3


## export output csv

In [None]:
output=pd.read_csv("gdrive/MyDrive/final_project/sample_output.csv")
#將結果放到正確格式的csv
output["Type"]=pd.DataFrame(pred)
output.to_csv('gdrive/MyDrive/final_project/output/resnetV2_augment_lr_600_3.csv',index=False)
output

Unnamed: 0,Name,Type
0,000d70d25191ad64f00ca88a227c5985.jpg,3
1,0030def9579a3cb2f5d334dee7a1fb78.jpg,1
2,00323f13dd4c931d2b98382318fff36f.jpg,0
3,0047247f1be33a5a0cee4470760dcdf7.jpg,0
4,006ef2a336bd15f53b50440837a847b2.jpg,0
...,...,...
2795,ffa9fa81f310f5127c638b614a5a6034.jpg,3
2796,ffc3fa52be8106b071dbb4d45dab5041.jpg,2
2797,ffdc0dbd42650ace9a1be2c0d11bfa73.jpg,3
2798,ffed19041a89c1e6d9551d43d4584bc1.jpg,3
