<a href="https://colab.research.google.com/github/YixinFan11/Machine-Learning-in-Science-II/blob/master/Transfer_Learning_Model_1(two_models).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Downloading Data

In [3]:
!pip install kaggle --upgrade
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c machine-learning-in-science-2022
!mkdir content/data
!unzip machine-learning-in-science-2022.zip -d data

machine-learning-in-science-2022.zip: Skipping, found more recently modified local copy (use --force to force download)
mkdir: cannot create directory ‘content/data’: No such file or directory
Archive:  machine-learning-in-science-2022.zip
replace data/sampleSubmission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


## Header Files

In [4]:

# python standard libraries
import os
import random
import fnmatch
import datetime
import pickle
import seaborn as sns

# data processing
import numpy as np
np.set_printoptions(formatter={'float_kind':lambda x: "%.4f" % x})

import pandas as pd
pd.set_option('display.width', 300)
pd.set_option('display.float_format', '{:,.4f}'.format)
pd.set_option('display.max_colwidth', 200)

# tensorflow
import tensorflow as tf
import keras
from keras.models import *
from keras.models import Sequential  # V2 is tensorflow.keras.xxxx, V1 is keras.xxx
from keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam,SGD
from keras.models import load_model

print( f'tf.__version__: {tf.__version__}' )
print( f'keras.__version__: {keras.__version__}' )

# sklearn
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# imaging
import cv2
from imgaug import augmenters as img_aug
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from PIL import Image
from tqdm import tqdm

tf.__version__: 2.8.0
keras.__version__: 2.8.0


## Normalizing the Data

In [5]:
# Loading the Data
Data = pd.read_csv('/content/data/training_norm.csv')
print('Normalized Angle \n',Data.head())

# De normalizing the angle
# Data['angle'] = Data['angle'].apply(lambda ang: float(ang * 80) + 50)
Data[:10]
Data.sort_values(by=['image_id'])
print('Denormalized Angle \n',Data.head())

Normalized Angle 
    image_id  angle  speed
0         1 0.4375 0.0000
1         2 0.8125 1.0000
2         3 0.4375 1.0000
3         4 0.6250 1.0000
4         5 0.5000 0.0000
Denormalized Angle 
    image_id  angle  speed
0         1 0.4375 0.0000
1         2 0.8125 1.0000
2         3 0.4375 1.0000
3         4 0.6250 1.0000
4         5 0.5000 0.0000


## Missing Images

In [6]:
# I took the opprotunity to resize the images anyways
path = '/content/data/training_data/training_data'

## Takes around 2 minutes remember
corrupted_images = [] 

# Resizing the images and the getting the corrupt images
# It takes 6min 10sec
from PIL import Image
import os, sys
import glob
for filename in tqdm(glob.iglob(path + '**/*.png', recursive=True)):
    #print(filename)
    try:
        im = Image.open(filename)
        im = im.resize((224,224), Image.ANTIALIAS)
        im.save(filename , 'png', quality=90)
    except:
        corrupted_images.append(filename)

13798it [06:45, 34.04it/s]


In [7]:
print(corrupted_images)

['/content/data/training_data/training_data/8285.png', '/content/data/training_data/training_data/10171.png', '/content/data/training_data/training_data/3999.png', '/content/data/training_data/training_data/3141.png', '/content/data/training_data/training_data/4895.png']


## Data Split

In [8]:
def get_label(file_path):
    file_id = int(str(file_path).split('/')[-1].split('.')[0])
    img_id, angle, speed = Data[Data['image_id'] == int(file_id)].to_numpy().squeeze()
    return angle,speed

In [9]:
img_path = []

import os, sys
import glob
for filename in glob.iglob(path + '**/*.png', recursive=True):
    if filename not in corrupted_images:
        img_path.append(filename)
print(len(img_path))

Y_angle = []
Y_speed = []
# for i in img_path:
#     vals = []
#     a,s = get_label(i)
#     vals.append(a) ; vals.append(s);
#     Y.append(vals)
for i in img_path:
    vals = []
    a,s = get_label(i)
    Y_angle.append(a)
    Y_speed.append(s)


x_train_angle, x_valid_angle, y_train_angle, y_valid_angle = train_test_split(img_path , Y_angle , test_size = 0.25)
x_train_speed, x_valid_speed, y_train_speed, y_valid_speed = train_test_split(img_path , Y_speed , test_size = 0.25)
print(len(x_train_angle),len(x_valid_angle),len(x_train_speed),len(x_valid_speed))
print(len(y_train_angle),len(y_valid_angle),len(y_train_speed),len(y_valid_speed))

13793
10344 3449 10344 3449
10344 3449 10344 3449


In [10]:
print(Y_angle[0:10])
print(Y_speed[0:10])

[0.5625, 0.875, 0.8125, 0.5, 0.8125, 0.625, 0.6875, 0.75, 0.8125, 0.6875]
[1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


In [None]:
# x_train, x_valid, y_train, y_valid = train_test_split(img_path, Y, test_size=0.30)
# print(len(x_train),len(x_valid))
# print(len(y_train),len(y_valid))

In [11]:
print(y_train_angle[:10])
print(y_valid_angle[:10])

[0.6875, 0.8125, 0.5, 0.5, 0.75, 0.625, 0.625, 0.4375, 0.4375, 0.5625]
[0.5625, 0.6875, 0.5, 0.6875, 0.8125, 0.5625, 0.4375, 0.6875, 0.625, 0.375]


In [12]:
def process_image(path):
    '''Read the image's path and return the image'''
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = image/255
    #tf.reshape(image,(224,224,3))
    return image

## Image Data Generator

In [None]:
'''
IDEAs 
    1 - Get the paths of all the images if the image is not corrupt then generate its label and then make pd dataframe with - content , angle , speed
    2 - Send the pd data frame to image data generator - get the image from the first column and second column will have the labels
'''

'\nIDEAs \n    1 - Get the paths of all the images if the image is not corrupt then generate its label and then make pd dataframe with - content , angle , speed\n    2 - Send the pd data frame to image data generator - get the image from the first column and second column will have the labels\n'

In [13]:
def image_data_generator(image_paths, steering_angles, batch_size):
    while True:
        batch_images = []
        batch_steering_angles = []
        for i in range(batch_size):
            random_index = random.randint(0, len(image_paths) - 1)
            image_path = image_paths[random_index]
            steering_angle = steering_angles[random_index]
            image = process_image(image_path)
            batch_images.append(image)
            batch_steering_angles.append(steering_angle)
        yield( np.asarray(batch_images), np.asarray(batch_steering_angles))

## Transfer Learning Code

### Transfer Learning(Angel)

In [14]:
#Angle model
from keras.applications.inception_v3 import InceptionV3
incept_angle= InceptionV3(input_shape = (224,224,3),weights='imagenet',include_top=False)
incept_angle.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [15]:
from keras.layers import Dense, GlobalAveragePooling2D 

for layer in incept_angle.layers[290:]: 
    layer.trainable = True 
x = GlobalAveragePooling2D()(incept_angle.output)
x = Flatten()(x)
x = Dense(1024, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
# let's add a fully-connected layer as first layer
x = Dense(256, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
x = Dense(64, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
x = Dense(32, activation ='relu')(x)#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(x)
# x = Dense( 32, activation ='relu')(x)
pred_angle = Dense(1)(x)

In [16]:
from keras.models import Model
my_inc_angle = Model(inputs = incept_angle.input,outputs=pred_angle)
my_inc_angle.summary()
my_inc_angle.compile(optimizer = 'adam',loss='mse',metrics=['mse'])
my_inc_angle.optimizer.learning_rate = 0.000005

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 111, 111, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 111, 111, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [17]:
history = my_inc_angle.fit_generator(image_data_generator(x_train_angle, y_train_angle, batch_size=50),
                              steps_per_epoch=500,
                              epochs=20,
                              validation_data = image_data_generator(x_valid_angle,y_valid_angle, batch_size=50),
                              validation_steps=500,
                              verbose=1,
                              shuffle=1)

  import sys


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [18]:
print(x_train_angle[:10])
print(y_train_angle[:10])
print(x_valid_angle[:10])
print(y_valid_angle[:10])

['/content/data/training_data/training_data/9082.png', '/content/data/training_data/training_data/2894.png', '/content/data/training_data/training_data/2753.png', '/content/data/training_data/training_data/9152.png', '/content/data/training_data/training_data/13036.png', '/content/data/training_data/training_data/13014.png', '/content/data/training_data/training_data/8020.png', '/content/data/training_data/training_data/10971.png', '/content/data/training_data/training_data/10789.png', '/content/data/training_data/training_data/11981.png']
[0.6875, 0.8125, 0.5, 0.5, 0.75, 0.625, 0.625, 0.4375, 0.4375, 0.5625]
['/content/data/training_data/training_data/636.png', '/content/data/training_data/training_data/8102.png', '/content/data/training_data/training_data/4970.png', '/content/data/training_data/training_data/5088.png', '/content/data/training_data/training_data/5638.png', '/content/data/training_data/training_data/9612.png', '/content/data/training_data/training_data/6120.png', '/con

### Transfer Learning(Speed)

In [19]:
from keras.applications.inception_v3 import InceptionV3
incept_speed= InceptionV3(input_shape = (224,224,3),weights='imagenet',include_top=False)
incept_speed.trainable = False

In [20]:
from keras.layers import Dense, GlobalAveragePooling2D 
from keras.regularizers import l1,l2
for layer in incept_speed.layers[290:]: 
    layer.trainable = True 
y = GlobalAveragePooling2D()(incept_speed.output)
# y = Flatten()(incept_speed.output)
y = Dense(1024, activation ='relu')(y)
# let's add a fully-connected layer as first layer
y = Dense(256, activation ='relu')(y)
y = Dense(64, activation ='relu')(y)
y = Dense(32, activation ='relu')(y)
# y = Dense(1024, activation ='relu')#,kernel_regularizer=l1(0.0001), bias_regularizer=l1(0.0001))(y)
# y = Dense(256, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense(64, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense(32, activation ='relu')#,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))(y)
# y = Dense( 32, activation ='relu')(y)
pred_speed = Dense(1,activation ='sigmoid')(y)

In [21]:
from keras.models import Model
my_inc_speed = Model(inputs = incept_speed.input,outputs=pred_speed)
my_inc_speed.summary()
loss_function = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
my_inc_speed.compile(optimizer = 'adam',loss="mse",metrics=["mse"])
my_inc_speed.optimizer.learning_rate = 0.000002

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_94 (Conv2D)             (None, 111, 111, 32  864         ['input_2[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_94 (BatchN  (None, 111, 111, 32  96         ['conv2d_94[0][0]']              
 ormalization)                  )                                                           

In [22]:
history = my_inc_speed.fit_generator(image_data_generator(x_train_speed, y_train_speed, batch_size=50),
                              steps_per_epoch=500, epochs=20,
                              validation_data = image_data_generator(x_valid_speed,y_valid_speed, batch_size=50),
                              validation_steps=500,
                              verbose=1, shuffle=1)

  """


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
print(x_train_speed[:10])
print(y_train_speed[:10])
print(x_valid_speed[:10])
print(y_valid_speed[:10])

['/content/data/training_data/training_data/9449.png', '/content/data/training_data/training_data/4705.png', '/content/data/training_data/training_data/638.png', '/content/data/training_data/training_data/13471.png', '/content/data/training_data/training_data/6402.png', '/content/data/training_data/training_data/7008.png', '/content/data/training_data/training_data/13381.png', '/content/data/training_data/training_data/7693.png', '/content/data/training_data/training_data/12905.png', '/content/data/training_data/training_data/1101.png']
[1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0]
['/content/data/training_data/training_data/12968.png', '/content/data/training_data/training_data/7927.png', '/content/data/training_data/training_data/109.png', '/content/data/training_data/training_data/13151.png', '/content/data/training_data/training_data/499.png', '/content/data/training_data/training_data/2483.png', '/content/data/training_data/training_data/12242.png', '/content/data/training_da

In [25]:
testing_path = '/content/data/test_data/test_data'

In [26]:
corrupted_image_test = []

for filename in glob.iglob(testing_path + '**/*.png', recursive=True):
    try:
        im = Image.open(filename)
        im = im.resize((224,224), Image.ANTIALIAS)
        im.save(filename , 'png', quality=90)
    except:
        corrupted_image_test.append(filename)


In [27]:
print(corrupted_image_test)

[]


In [28]:
img_path_test = []
import os, sys
import glob
for filename in glob.iglob(testing_path + '**/*.png', recursive=True):
    if filename not in corrupted_image_test:
        img_path_test.append(filename)
print(len(img_path_test))

1020


In [29]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()
image_ids = []
result_angle = []
result_speed = []
for i in img_path_test:
    img = process_image(i);
    i_id = int(str(i).split('/')[-1].split('.')[0])
    img = img.reshape(1,224,224,3)
    res_angle = my_inc_angle.predict(img)
    res_speed = my_inc_speed.predict(img)
    result_angle.append(res_angle)
    result_speed.append(res_speed)
    image_ids.append(i_id)


In [30]:
final_result_angle = list(map(lambda x: x.tolist(), result_angle))
final_result_angle = [item for sublist in final_result_angle for item in sublist]
final_result_angle = [item for sublist in final_result_angle for item in sublist]
print(final_result_angle[:5])
final_result_speed = list(map(lambda x: x.tolist(), result_speed))
final_result_speed = [item for sublist in final_result_speed for item in sublist]
final_result_speed = [item for sublist in final_result_speed for item in sublist]
print(final_result_speed[:5])
print(image_ids[:5])

[0.7001897692680359, 0.09602244198322296, 0.6054126024246216, 0.6015574932098389, 0.7169943451881409]
[0.006520142313092947, 0.8983640074729919, 0.001038573682308197, 0.9949659705162048, 0.8707300424575806]
[767, 4, 796, 535, 256]


In [31]:
# Import pandas library
import pandas as pd
 
# Create the pandas DataFrame
test_pred = pd.DataFrame({'image_id': image_ids, 'angel': final_result_angle, 'speed': final_result_speed})

test_pred.head(5)

Unnamed: 0,image_id,angel,speed
0,767,0.7002,0.0065
1,4,0.096,0.8984
2,796,0.6054,0.001
3,535,0.6016,0.995
4,256,0.717,0.8707


In [32]:
sort_result = test_pred.sort_values(test_pred.columns[0], ascending = True).reset_index(drop=True)
sort_result.head(5)

Unnamed: 0,image_id,angel,speed
0,1,0.5606,0.0092
1,2,0.6827,0.9979
2,3,0.3042,0.9614
3,4,0.096,0.8984
4,5,0.311,0.9987


In [33]:
from google.colab import drive
drive.mount('drive')

sort_result.to_csv('/content/drive/My Drive/tesla_submission_2models.csv', encoding='utf-8', index=False)

Mounted at drive


In [None]:
!kaggle competitions submit -c machine-learning-in-science-2022 -f submission.csv -m "Message"