# Load Images

In [1]:
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
from google.cloud import storage
from google.oauth2 import service_account

In [3]:
def get_images_to_folder():
    BUCKET_NAME = "mvp_youtube_optimizer"
    storage_dir = "lunch3"
    local_dir = "bucket_data_2/" #Create this manually

    my_credentials = service_account.Credentials.from_service_account_file("massive-pen-365111-8eaed18fb748.json")

    client = storage.Client(credentials=my_credentials)
    bucket = client.bucket(BUCKET_NAME)
    blob = bucket.blob(storage_dir)
    
    blobs = bucket.list_blobs(prefix =storage_dir)
    for blob in blobs:
        filename = blob.name.replace('/','_')
        blob.download_to_filename(local_dir + filename)

In [4]:
def load_images_from_folder(folder='/Users/nicolafriedrich/code/jacksharples1/youtube_optimizer/bucket_data_2'):
    images = []
    views = []
    for filename in os.listdir(folder):
        if len(filename)< len('lunch3')+1:
            continue

        else:
            img = plt.imread(os.path.join(folder,filename))
            if img.shape != (180,320,3):
                print(img.shape)
                continue
            else:
                last_underscore = filename.rfind('_')
                y = int(filename[last_underscore +1:])
                images.append(list(img))
                views.append(y)
    return images, views


In [5]:
def dataloading():
    get_images_to_folder()
    X,y = load_images_from_folder()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    return X_train, X_test, y_train, y_test

In [6]:
X_train, X_test, y_train, y_test = dataloading()

(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)
(90, 120, 3)


# Model

In [7]:
from keras.applications.xception import Xception
from keras.layers import GlobalAveragePooling2D, Dense, Flatten
from keras.models import Sequential
#from keras.optimizers import SGD
#import numpy as np

2022-11-29 17:11:50.867568: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
def base_model():
    base_model = Xception(weights="imagenet",input_shape = (180,320,3),include_top=False)
    base_model.trainable = False
    return base_model

In [9]:
def complete_model():
    model = Sequential((
        base_model(),
        GlobalAveragePooling2D(),
        Flatten(),
        Dense(100,activation = 'relu'),
        Dense(50,activation = 'relu'),
        Dense(1,activation = 'linear')))
    
    model.compile(loss="mse", optimizer='adam',
                  metrics=["mae"])
    
    return model  

In [10]:
model = complete_model()
model.summary()

2022-11-29 17:12:01.055741: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 xception (Functional)       (None, 6, 10, 2048)       20861480  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 100)               204900    
                                                                 
 dense_1 (Dense)             (None, 50)                5050      
                                                                 
 dense_2 (Dense)             (None, 1)                 51        
                                                        

In [11]:
X_train.shape

(4334, 180, 320, 3)

In [12]:
from tensorflow.keras.callbacks import EarlyStopping
es = EarlyStopping(patience = 15, restore_best_weights = True)

In [None]:
history = model.fit(X_train,y_train, epochs = 1000,batch_size = 16,validation_split=0.2)

Epoch 1/1000

# Baseline model

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
np.mean(np.abs(y_train - np.mean(y_train)))