# Load Images

In [10]:
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

In [11]:
from google.cloud import storage
from google.oauth2 import service_account

In [12]:
def get_images_to_folder():
    
    #To change
    BUCKET_NAME = "youtube_optimizer_data" #Bucket name
    storage_dir = "thumbnail_images" #Folder in bucket that contains th images
    local_dir = "bucket_data/" #Create this manually in the same location as this file
    #Store the "massive-pen-365111-8eaed18fb748.json" file there as well

    my_credentials = service_account.Credentials.from_service_account_file("massive-pen-365111-8eaed18fb748.json")

    client = storage.Client(credentials=my_credentials)
    bucket = client.bucket(BUCKET_NAME)
    blob = bucket.blob(storage_dir)
    
    blobs = bucket.list_blobs(prefix =storage_dir)
    for blob in blobs:
        filename = blob.name.replace('/','_')
        blob.download_to_filename(local_dir + filename)

In [13]:
#Specify the path to the folder in which you just stored the images
def load_images_from_folder(folder='/Users/nicolafriedrich/code/jacksharples1/youtube_optimizer/bucket_data'):
    images = []
    views = []
    video_ids = []
    for filename in os.listdir(folder):
        last_underscore = filename.rfind('_')
        video_id = filename[len('thumbnail_images_'):last_underscore-1]
        if video_id not in video_ids:
            video_ids.append(video_id)
            if len(filename)< len('thumbnail_images_')+1: #enter name of the folder in the bucket that contains the images
                continue
    
            else:
                img = plt.imread(os.path.join(folder,filename))
                if img.shape != (180,320,3):
                    continue
                else:
                    
                    y = int(filename[last_underscore +1:])
                    images.append(list(img))
                    views.append(y)
    return images, views


In [None]:
load_images_from_folder()

(90, 120, 3)
(90, 120, 3)
(90, 120, 3)


In [25]:
def dataloading():
    get_images_to_folder()
    X,y = load_images_from_folder()
    #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    return X_train, X_test, y_train, y_test

In [26]:
X_train, X_test, y_train, y_test = dataloading()

(90, 120, 3)
(90, 120, 3)
(90, 120, 3)


# Model

In [17]:
from keras.applications.xception import Xception
from keras.layers import GlobalAveragePooling2D, Dense, Flatten
from keras.models import Sequential

2022-11-29 16:52:20.999286: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [18]:
def base_model():
    base_model = Xception(weights="imagenet",input_shape = (180,320,3),include_top=False)
    base_model.trainable = False
    return base_model

In [19]:
def complete_model():
    model = Sequential((
        base_model(),
        GlobalAveragePooling2D(),
        Flatten(),
        Dense(50,activation = 'relu'),
        Dense(1,activation = 'linear')))
    
    model.compile(loss="mse", optimizer='adam',
                  metrics=["mae"])
    
    return model  

In [20]:
model = complete_model()
model.summary()

2022-11-29 16:52:29.805221: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 xception (Functional)       (None, 6, 10, 2048)       20861480  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 50)                102450    
                                                                 
 dense_1 (Dense)             (None, 1)                 51        
                                                                 
Total params: 20,963,981
Trainable params: 102,501
Non-trainable params: 20,861,480
______________________________________

In [None]:
history = model.fit(X_train,y_train, epochs = 5,validation_split=0.2)

Epoch 1/5


# Baseline model

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
np.mean(np.abs(y_train - np.mean(y_train)))