<a href="https://colab.research.google.com/github/YixinFan11/Machine-Learning-in-Science-II/blob/master/Transfer_Learning_Model(two_models).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Downloading Data

In [None]:
!pip install kaggle --upgrade
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c machine-learning-in-science-2022
!mkdir content/data
!unzip machine-learning-in-science-2022.zip -d data

machine-learning-in-science-2022.zip: Skipping, found more recently modified local copy (use --force to force download)
mkdir: cannot create directory ‘content/data’: No such file or directory
Archive:  machine-learning-in-science-2022.zip
replace data/sampleSubmission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


## Header Files

In [None]:

# python standard libraries
import os
import random
import fnmatch
import datetime
import pickle
import seaborn as sns

# data processing
import numpy as np
np.set_printoptions(formatter={'float_kind':lambda x: "%.4f" % x})

import pandas as pd
pd.set_option('display.width', 300)
pd.set_option('display.float_format', '{:,.4f}'.format)
pd.set_option('display.max_colwidth', 200)

# tensorflow
import tensorflow as tf
import keras
from keras.models import *
from keras.models import Sequential  # V2 is tensorflow.keras.xxxx, V1 is keras.xxx
from keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam,SGD
from keras.models import load_model

print( f'tf.__version__: {tf.__version__}' )
print( f'keras.__version__: {keras.__version__}' )

# sklearn
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# imaging
import cv2
from imgaug import augmenters as img_aug
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from PIL import Image
from tqdm import tqdm

tf.__version__: 2.8.0
keras.__version__: 2.8.0


## Normalizing the Data

In [None]:
# Loading the Data
Data = pd.read_csv('/content/data/training_norm.csv')
print('Normalized Angle \n',Data.head())

# De normalizing the angle
# Data['angle'] = Data['angle'].apply(lambda ang: float(ang * 80) + 50)
Data[:10]
Data.sort_values(by=['image_id'])
print('Denormalized Angle \n',Data.head())

Normalized Angle 
    image_id  angle  speed
0         1 0.4375 0.0000
1         2 0.8125 1.0000
2         3 0.4375 1.0000
3         4 0.6250 1.0000
4         5 0.5000 0.0000
Denormalized Angle 
    image_id  angle  speed
0         1 0.4375 0.0000
1         2 0.8125 1.0000
2         3 0.4375 1.0000
3         4 0.6250 1.0000
4         5 0.5000 0.0000


## Missing Images

In [None]:
# I took the opprotunity to resize the images anyways
path = '/content/data/training_data/training_data'

## Takes around 2 minutes remember
corrupted_images = [] 

# Resizing the images and the getting the corrupt images
# It takes 6min 10sec
from PIL import Image
import os, sys
import glob
for filename in tqdm(glob.iglob(path + '**/*.png', recursive=True)):
    #print(filename)
    try:
        im = Image.open(filename)
        im = im.resize((224,224), Image.ANTIALIAS)
        im.save(filename , 'png', quality=90)
    except:
        corrupted_images.append(filename)

13798it [05:47, 39.73it/s]


In [None]:
print(corrupted_images)

['/content/data/training_data/training_data/8285.png', '/content/data/training_data/training_data/3141.png', '/content/data/training_data/training_data/4895.png', '/content/data/training_data/training_data/10171.png', '/content/data/training_data/training_data/3999.png']


## Data Split

In [None]:
def get_label(file_path):
    file_id = int(str(file_path).split('/')[-1].split('.')[0])
    img_id, angle, speed = Data[Data['image_id'] == int(file_id)].to_numpy().squeeze()
    return angle,speed

In [None]:
img_path = []

import os, sys
import glob
for filename in glob.iglob(path + '**/*.png', recursive=True):
    if filename not in corrupted_images:
        img_path.append(filename)
print(len(img_path))

Y_angle = []
Y_speed = []
# for i in img_path:
#     vals = []
#     a,s = get_label(i)
#     vals.append(a) ; vals.append(s);
#     Y.append(vals)
for i in img_path:
    vals = []
    a,s = get_label(i)
    Y_angle.append(a)
    Y_speed.append(s)


x_train_angle, x_valid_angle, y_train_angle, y_valid_angle = train_test_split(img_path , Y_angle , test_size = 0.25)
x_train_speed, x_valid_speed, y_train_speed, y_valid_speed = train_test_split(img_path , Y_speed , test_size = 0.25)
print(len(x_train_angle),len(x_valid_angle),len(x_train_speed),len(x_valid_speed))
print(len(y_train_angle),len(y_valid_angle),len(y_train_speed),len(y_valid_speed))

13793
10344 3449 10344 3449
10344 3449 10344 3449


In [None]:
print(Y_angle)
print(Y_speed)

[0.8125, 0.25, 0.625, 0.6875, 0.5625, 0.75, 0.4375, 0.4375, 0.6875, 0.5, 0.625, 0.5, 0.75, 0.6875, 0.8125, 0.4375, 0.6875, 0.75, 0.5, 0.375, 0.5625, 0.6875, 0.5, 0.5625, 0.5625, 0.4375, 0.5, 0.75, 0.6875, 0.4375, 0.4375, 0.75, 0.8125, 0.75, 0.75, 0.4375, 0.6875, 0.5625, 0.75, 0.6875, 0.5, 0.6875, 0.5, 0.5, 0.5625, 0.4375, 0.6875, 0.4375, 0.5, 0.625, 0.75, 0.5625, 0.4375, 0.75, 0.4375, 0.4375, 0.75, 0.5625, 0.75, 0.8125, 0.75, 0.75, 0.6875, 0.6875, 0.625, 0.5625, 0.625, 0.6875, 0.625, 0.625, 0.75, 0.75, 0.75, 0.75, 0.625, 0.4375, 0.625, 0.5625, 0.625, 0.1875, 0.5625, 0.625, 0.5, 0.5, 0.8125, 0.75, 0.4375, 0.75, 0.4375, 0.75, 0.6875, 0.4375, 0.5, 0.8125, 0.75, 0.125, 0.375, 0.5, 0.5, 0.8125, 0.6875, 0.4375, 0.6875, 0.25, 0.8125, 0.4375, 0.625, 0.5, 0.75, 0.625, 0.75, 0.5, 0.6875, 0.625, 0.625, 0.4375, 0.6875, 0.5625, 0.6875, 0.75, 0.625, 0.5, 0.4375, 0.5625, 0.5, 0.5, 0.6875, 0.5, 0.4375, 0.875, 0.6875, 0.6875, 0.75, 0.625, 0.6875, 0.6875, 0.5, 0.4375, 0.5, 0.6875, 0.75, 0.5, 0.5, 0.5625

In [None]:
# x_train, x_valid, y_train, y_valid = train_test_split(img_path, Y, test_size=0.30)
# print(len(x_train),len(x_valid))
# print(len(y_train),len(y_valid))

In [None]:
print(y_train_angle[:10])
print(y_valid_angle[:10])

[0.75, 0.6875, 0.1875, 0.5625, 0.75, 0.6875, 0.4375, 0.5, 0.8125, 0.5]
[0.5, 0.6875, 0.4375, 0.5, 0.75, 0.6875, 0.6875, 0.1875, 0.6875, 0.75]


In [None]:
def process_image(path):
    '''Read the image's path and return the image'''
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = image/255
    #tf.reshape(image,(224,224,3))
    return image

## Image Data Generator

In [None]:
'''
IDEAs 
    1 - Get the paths of all the images if the image is not corrupt then generate its label and then make pd dataframe with - content , angle , speed
    2 - Send the pd data frame to image data generator - get the image from the first column and second column will have the labels
'''

'\nIDEAs \n    1 - Get the paths of all the images if the image is not corrupt then generate its label and then make pd dataframe with - content , angle , speed\n    2 - Send the pd data frame to image data generator - get the image from the first column and second column will have the labels\n'

In [None]:
def image_data_generator(image_paths, steering_angles, batch_size):
    while True:
        batch_images = []
        batch_steering_angles = []
        for i in range(batch_size):
            random_index = random.randint(0, len(image_paths) - 1)
            image_path = image_paths[random_index]
            steering_angle = steering_angles[random_index]
            image = process_image(image_path)
            batch_images.append(image)
            batch_steering_angles.append(steering_angle)
        yield( np.asarray(batch_images), np.asarray(batch_steering_angles))

## Transfer Learning Code

### Transfer Learning(Angel)

In [None]:
#Angle model
from keras.applications.inception_v3 import InceptionV3
incept_angle= InceptionV3(input_shape = (224,224,3),weights='imagenet',include_top=False)
incept_angle.trainable = False

In [None]:
from keras.layers import Dense, GlobalAveragePooling2D 

for layer in incept_angle.layers[290:]: 
    layer.trainable = True 
x = GlobalAveragePooling2D()(incept_angle.output)
x = Flatten()(x)
x = Dense(1024, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
# let's add a fully-connected layer as first layer
x = Dense(256, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
x = Dense(64, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
x = Dense(32, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
# x = Dense( 32, activation ='relu')(x)
pred_angle = Dense(1)(x)

In [None]:
from keras.models import Model
my_inc_angle = Model(inputs = incept_angle.input,outputs=pred_angle)
my_inc_angle.summary()
my_inc_angle.compile(optimizer = 'adam',loss='mse',metrics=['mse'])
my_inc_angle.optimizer.learning_rate = 0.000005

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 111, 111, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 111, 111, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [None]:
history = my_inc_angle.fit_generator(image_data_generator(x_train_angle, y_train_angle, batch_size=50),
                              steps_per_epoch=500,
                              epochs=20,
                              validation_data = image_data_generator(x_valid_angle,y_valid_angle, batch_size=50),
                              validation_steps=500,
                              verbose=1,
                              shuffle=1)

  import sys


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
print(x_train_angle[:10])
print(y_train_angle[:10])
print(x_valid_angle[:10])
print(y_valid_angle[:10])

['/content/data/training_data/training_data/2616.png', '/content/data/training_data/training_data/3282.png', '/content/data/training_data/training_data/11990.png', '/content/data/training_data/training_data/1201.png', '/content/data/training_data/training_data/600.png', '/content/data/training_data/training_data/5760.png', '/content/data/training_data/training_data/13.png', '/content/data/training_data/training_data/6294.png', '/content/data/training_data/training_data/11677.png', '/content/data/training_data/training_data/4667.png']
[0.75, 0.6875, 0.1875, 0.5625, 0.75, 0.6875, 0.4375, 0.5, 0.8125, 0.5]
['/content/data/training_data/training_data/11160.png', '/content/data/training_data/training_data/9088.png', '/content/data/training_data/training_data/30.png', '/content/data/training_data/training_data/10779.png', '/content/data/training_data/training_data/6607.png', '/content/data/training_data/training_data/3225.png', '/content/data/training_data/training_data/6825.png', '/content/

### Transfer Learning(Speed)

In [None]:
from keras.applications.inception_v3 import InceptionV3
incept_speed= InceptionV3(input_shape = (224,224,3),weights='imagenet',include_top=False)
incept_speed.trainable = False

In [None]:
from keras.layers import Dense, GlobalAveragePooling2D 
from keras.regularizers import l1,l2
for layer in incept_speed.layers[290:]: 
    layer.trainable = True 
y = GlobalAveragePooling2D()(incept_speed.output)
# y = Flatten()(incept_speed.output)
y = Dense(1024, activation ='relu')(y)
# let's add a fully-connected layer as first layer
y = Dense(256, activation ='relu')(y)
y = Dense(64, activation ='relu')(y)
y = Dense(32, activation ='relu')(y)
# y = Dense(1024, activation ='relu')#,kernel_regularizer=l1(0.0001), bias_regularizer=l1(0.0001))(y)
# y = Dense(256, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense(64, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense(32, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense( 32, activation ='relu')(y)
pred_speed = Dense(1,activation ='sigmoid')(y)

In [None]:
from keras.models import Model
my_inc_speed = Model(inputs = incept_speed.input,outputs=pred_speed)
my_inc_speed.summary()
loss_function = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
my_inc_speed.compile(optimizer = 'adam',loss="mse",metrics=["mse"])
my_inc_speed.optimizer.learning_rate = 0.000002

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_94 (Conv2D)             (None, 111, 111, 32  864         ['input_2[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_94 (BatchN  (None, 111, 111, 32  96         ['conv2d_94[0][0]']              
 ormalization)                  )                                                           

In [None]:
history = my_inc_speed.fit_generator(image_data_generator(x_train_speed, y_train_speed, batch_size=50),
                              steps_per_epoch=500, epochs=20,
                              validation_data = image_data_generator(x_valid_speed,y_valid_speed, batch_size=50),
                              validation_steps=500,
                              verbose=1, shuffle=1)

  """


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
print(x_train_speed[:10])
print(y_train_speed[:10])
print(x_valid_speed[:10])
print(y_valid_speed[:10])

['/content/data/training_data/training_data/5600.png', '/content/data/training_data/training_data/11472.png', '/content/data/training_data/training_data/1128.png', '/content/data/training_data/training_data/13058.png', '/content/data/training_data/training_data/8571.png', '/content/data/training_data/training_data/11016.png', '/content/data/training_data/training_data/12178.png', '/content/data/training_data/training_data/6116.png', '/content/data/training_data/training_data/6253.png', '/content/data/training_data/training_data/2403.png']
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0]
['/content/data/training_data/training_data/4304.png', '/content/data/training_data/training_data/11321.png', '/content/data/training_data/training_data/7256.png', '/content/data/training_data/training_data/2099.png', '/content/data/training_data/training_data/7138.png', '/content/data/training_data/training_data/9053.png', '/content/data/training_data/training_data/4043.png', '/content/data/training_

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

my_inc.save('/content/gdrive/My Drive')

In [None]:
testing_path = '/content/data/test_data/test_data'

In [None]:
corrupted_image_test = []

for filename in glob.iglob(testing_path + '**/*.png', recursive=True):
    try:
        im = Image.open(filename)
        im = im.resize((224,224), Image.ANTIALIAS)
        im.save(filename , 'png', quality=90)
    except:
        corrupted_image_test.append(filename)


In [None]:
print(corrupted_image_test)

[]


In [None]:
img_path_test = []
import os, sys
import glob
for filename in glob.iglob(testing_path + '**/*.png', recursive=True):
    if filename not in corrupted_image_test:
        img_path_test.append(filename)
print(len(img_path_test))

1020


In [None]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
result_speed_1 = []
for i in img_path_test:
    img = process_image(i);
    img = img.reshape(1,224,224,3)
    res_speed = my_inc_speed.predict(img)
    result_speed_1.append(res_speed)

In [None]:
final_result_speed = list(map(lambda x: x.tolist(), result_speed_1))
final_result_speed = [item for sublist in final_result_speed for item in sublist]
print(final_result_speed)

[[0.9908283948898315], [0.9999920129776001], [0.9799683094024658], [0.9988290667533875], [0.9706987738609314], [0.998714804649353], [0.9994538426399231], [0.18334242701530457], [0.9979166388511658], [0.008086180314421654], [0.060349080711603165], [0.9986863732337952], [0.003532665316015482], [0.002943455008789897], [0.7315493822097778], [0.9546142816543579], [0.5762920379638672], [0.23579847812652588], [0.0013195887440815568], [0.0033307704143226147], [0.99250727891922], [0.22234539687633514], [0.0007638867828063667], [0.9882943630218506], [0.9956953525543213], [0.003460899693891406], [0.9998883008956909], [0.0006355594377964735], [0.0009612221037968993], [0.01993577368557453], [0.99958735704422], [0.08928424119949341], [0.9653856754302979], [0.0026106154546141624], [0.025209877640008926], [0.0005579603603109717], [0.013730266131460667], [0.02096942812204361], [0.00364180956967175], [0.8326734304428101], [0.030933517962694168], [0.0017451229505240917], [0.00684500252828002], [0.0021171

In [None]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
image_ids = []
result_angle = []
result_speed = []
for i in img_path_test:
    img = process_image(i);
    i_id = int(str(i).split('/')[-1].split('.')[0])
    img = img.reshape(1,224,224,3)
    res_angle = my_inc_angle.predict(img)
    res_speed = my_inc_speed.predict(img)
    result_angle.append(res_angle)
    result_speed.append(res_speed)
    image_ids.append(i_id)


In [None]:
final_result_angle = list(map(lambda x: x.tolist(), result_angle))
final_result_angle = [item for sublist in final_result_angle for item in sublist]
final_result_angle = [item for sublist in final_result_angle for item in sublist]
print(final_result_angle)
final_result_speed = list(map(lambda x: x.tolist(), result_speed))
final_result_speed = [item for sublist in final_result_speed for item in sublist]
final_result_speed = [item for sublist in final_result_speed for item in sublist]
print(final_result_speed)
print(image_ids)

[0.3408866226673126, 0.6262978315353394, 0.15645389258861542, 0.5725411176681519, 0.42492151260375977, 0.5881374478340149, 0.7401219010353088, 0.711682915687561, 0.4291408956050873, 0.6880265474319458, 0.6716923713684082, 0.6695213913917542, 0.7004832029342651, 0.5666773915290833, 0.6837525963783264, 0.44735288619995117, 0.6923030614852905, 0.638121485710144, 0.7840225696563721, 0.7023571133613586, 0.2243773192167282, 0.7049775123596191, 0.7131773233413696, 0.48401129245758057, 0.4850512146949768, 0.529827892780304, 0.4897163510322571, 0.7369695901870728, 0.6334044337272644, 0.5008676052093506, 0.7917647361755371, 0.5836697220802307, 0.7544696927070618, 0.462216317653656, 0.8158219456672668, 0.628065824508667, 0.5483233332633972, 0.41509580612182617, 0.4787461757659912, 0.4963187873363495, 0.5884669423103333, 0.7090204954147339, 0.6830455660820007, 0.6109650135040283, 0.4531325399875641, 0.5117426514625549, 0.6047192215919495, 0.4145357012748718, 0.5294594764709473, 0.5062023997306824,

In [None]:
# Import pandas library
import pandas as pd
 
# Create the pandas DataFrame
test_pred = pd.DataFrame({'image_id': image_ids, 'angel': final_result_angle, 'speed': final_result_speed})

test_pred.head(5)

Unnamed: 0,image_id,angel,speed
0,180,0.3409,0.9859
1,40,0.6263,0.9997
2,160,0.1565,0.9675
3,554,0.5725,0.9967
4,683,0.4249,0.9804


In [None]:
sort_result = test_pred.sort_values(test_pred.columns[0], ascending = True).reset_index(drop=True)
sort_result.head(5)

Unnamed: 0,image_id,angel,speed
0,1,0.5976,0.0071
1,2,0.69,0.997
2,3,0.2182,0.9785
3,4,0.0284,0.9757
4,5,0.3291,0.9991


In [None]:
from google.colab import drive
drive.mount('drive')

sort_result.to_csv('/content/drive/My Drive/tesla_submission_2models.csv', encoding='utf-8', index=False)

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [None]:
!kaggle competitions submit -c machine-learning-in-science-2022 -f submission.csv -m "Message"