created on: Tue Jan 14 09:44:36 2020
<br>
Group 7
<br>
@authors: V.B., E.G.

# Environment

# Librairies

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import h5py
import warnings
warnings.filterwarnings('ignore')
import datetime

# Deep learning
import keras
from keras.models import load_model
from keras.applications.imagenet_utils import decode_predictions

from keras.applications.vgg16 import preprocess_input

Using TensorFlow backend.


In [2]:
time_s = datetime.datetime.now()

In [3]:
project_path = './../'
models_path = project_path + 'Models_F/'
data_path = 'Interpromo2020/All Data/ANALYSE IMAGE/'
path_out = './'

# Choose social network: SEATGURU, INSTAGRAM
social_net = 'SEATGURU'
insta_hashtag = 'boeing'  # if social_net == 'INSTAGRAM'

# Choose images parameters
size = (224, 224)
greys = False

In [4]:
path_real = project_path + 'ImagesStats/'
df_real = pd.read_csv(path_real + 'g7_SEATGURU_annotate.csv', sep=';')
df_real.rename(columns={"name": "img"}, inplace = True)

In [5]:
##Functions

In [6]:
%run g7_pickle_save_load.ipynb

# Read images to predict

In [7]:
def read_img(img_path: str, size: tuple, greys: bool=False) -> np.array:
    
    img_img = Image.open(img_path)
    img_arr = np.array(img_img.resize(size))
    #img_arr = img_arr.reshape(1, size[0], size[1], 1 if greys else 3)
    img_arr = preprocess_input(img_arr.reshape(1,size[0], size[1], 1 if greys else 3))
    
    return img_arr

In [8]:
# Path
if social_net == 'SEATGURU':
    folder = 'IMG ' + social_net + '/'

elif social_net == 'INSTAGRAM':
    folder = social_net + '/' + insta_hashtag + '/'
    
path_pred = project_path + data_path + folder
imgs_names = os.listdir(path_pred)
imgs_names = [img for img in imgs_names if '.jpg' in img]

# Init results DataFrame
df = pd.DataFrame(columns=['img', 
                           'view', 
                           'view_proba', 
                           'manufacturer', 
                           'manufacturer_proba', 
                           'type', 
                           'type_proba'])
df['img'] = imgs_names

In [9]:
# Read all images, convert to array
all_imgs_arr = np.array([read_img(path_pred + imgs_names[k], size=(size[0], size[1]), greys=greys) 
                         for k in range(len(imgs_names))])

# Reshape for prediction
all_imgs_arr = all_imgs_arr.reshape(len(imgs_names), size[0], size[1], 1 if greys else 3)
all_imgs_arr.shape

(2556, 224, 224, 3)

# Models pipeline

#### Functions

In [10]:
def predict_from_model(img_arr: np.array, models_path: str, model_name: str) -> (list, list):
    
    # Load model and predict
    model, dic_class = load_files_model(path_mod=models_path, mod_name=model_name)
    print(dic_class)
    preds = model.predict(img_arr)
    labels = [np.argmax(preds[k]) for k in range(len(preds))]
    proba_labels = [np.max(preds[k]) for k in range(len(preds))]
    labels = [list(dic_class.keys())[list(dic_class.values()).index(lab)] for lab in labels]
    
    del model, dic_class  # remove model and dict from environment
    
    return labels, proba_labels

In [11]:
def predict_save(df: pd.DataFrame, all_imgs_arr: np.array, filter_:list, models_path: str, 
                 model_name: str, to_fill: str) -> (pd.DataFrame, int):
    
    imgs = all_imgs_arr
    ix = []
    
    # Apply filter(s)
    for k in filter_.keys():
        ix.append(df[df[k] == filter_[k]].index.tolist())
    ind = ix[0]
    if len(ix) == 2 :
        ind = list(set(ix[0]).intersection(set(ix[1])))
    imgs = imgs[ind]
    

    # Get labels with probabilities
    labels, proba_labels = predict_from_model(img_arr=imgs, models_path=models_path,
                                              model_name=model_name)

    # Fill DataFrame
    df[to_fill].loc[ind] = labels
    df[to_fill + '_proba'].loc[ind] = proba_labels
    
    return df, ind

## First step: view

In [12]:
labels, proba_labels = predict_from_model(img_arr=all_imgs_arr, models_path=models_path,
                                          model_name='View')

# Fill DataFrame with View labels
df['view'] = labels
df['view_proba'] = proba_labels

{'Ext': 0, 'Ext_Int': 1, 'Int': 2, 'Meal': 3}


## Exterior

### Manufacturer and type

In [16]:
# If View == 'Ext', predict aircraft type and fill DataFrame
filter_ = dict({'view': 'Ext'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Ext_typ', to_fill='type')

# Deduce manufacturer from aircraft type
df['manufacturer'].loc[ind] = ['Airbus' if 'A' in typ else 'Boeing' for typ in df['type'].loc[ind]]

{'737': 0, '747': 1, '757': 2, '777': 3, '787': 4, 'A320': 5, 'A321': 6, 'A330': 7, 'A340': 8, 'A350': 9, 'A380': 10}


## Interior

### Manufacturer

In [19]:
# If View == 'Int', predict manufacturer
filter_ = dict({'view': 'Int'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_man', to_fill='manufacturer')

{'Airbus': 0, 'Boeing': 1}


### Airbus and Boeing types

In [20]:
# If View == 'Int' and manufacturer == 'Airbus', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Airbus'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_Airbus', to_fill='type')

# If View == 'Int' and manufacturer == 'Boeing', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Boeing'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_Boeing', to_fill='type')



{'A320': 0, 'A321': 1, 'A330': 2, 'A350': 3, 'A380': 4}
{'737': 0, '747': 1, '757': 2, '777': 3}


# Save results as csv

In [22]:
print(datetime.datetime.now() - time_s)

0:17:41.900967


In [23]:
# Path to save
if social_net != 'INSTAGRAM':
    path_save = path_out + '/pred_' + social_net + '_4.csv'
    
else:
    path_save = path_out + '/pred_' + social_net + '_' + insta_hashtag + '_4.csv'

# Save
df.to_csv(path_or_buf=path_save, sep=';', encoding='utf-8', index=False)