created on: Tue Jan 14 09:44:36 2020
<br>
Group 7
<br>
@authors: V.B., E.G.

<h1>Group 7 - Images sociales<span class="tocSkip"></span>
    
<br>  
<center>Pipeline

# Introduction
This notebook runs all our models on images folders. Before launching the pipeline, you just have to set `social_net`: name of the desired social network, and make sure you have the corresponding folder filled with images in your `data_path`. Specify `insta_hashtag`, if need be.


For the moment you have 2 folders for Seatguru and Instagram. The latter contains 4 subfolders for the following hashtags: airbus, aircraftinterior, aircraftseat, and boeing. You can add new images in any folder and relaunch the pipeline, or create folders for new hashtags and/or social media.


**Out**<span class="tocSkip"></span><br>
After a pipeline run, you will find CSV files containing predictions in your `Results` folder.

# Environment
To ensure a proper functioning of this code file, `python 3.6` or later version is required.

## Libraries

In [1]:
import warnings
warnings.filterwarnings('ignore')
from keras import models
from keras.applications.vgg16 import preprocess_input
from keras.applications.imagenet_utils import decode_predictions
from keras.models import load_model
import keras
import datetime
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import pickle
import os

Using TensorFlow backend.


In [2]:
%load_ext watermark
%watermark -p keras,tensorflow,PIL

keras 2.3.1
tensorflow 1.13.1
PIL 6.2.0


## Parameters

In [2]:
project_path = './../'
models_path = project_path + 'Models_F/'
data_path = 'Interpromo2020/All Data/ANALYSE IMAGE/'
path_out = './'

# Choose social network: SEATGURU, INSTAGRAM
social_net = 'INSTAGRAM'
insta_hashtag = 'boeing'  # if social_net == 'INSTAGRAM'

# Choose images parameters
size = (224, 224)
greys = False

## Functions

In [3]:
def load_files_model(path_mod: str, mod_name: str):
    """Loads a model

    Parameters:
        path_mod: path to models folders
        mod_name: model name

    Out:
        model: model in h5 format
        dic_class: dict with classes labels (keys), and correponding integers returned by the model (values)

    """

    model = keras.load_model(path_mod + mod_name + '/' + 'model_' + mod_name + '.h5')
    with open(path_mod + mod_name + '/' + 'model_' + mod_name + '.pkl', "rb") as f:
        dic_class = pickle.load(f)
        
    return model, dic_class


def read_img(img_path: str, size: tuple, greys: bool = False) -> np.array:
    """Read and convert an image to numpy array.

    Parameters:
        img_path: path to desired image
        size: tuple in (width, height) format
        greys: False for colour, else True

    Out:
        img_arr: image in numpy array format

    """

    img_img = Image.open(img_path)
    img_arr = np.array(img_img.resize(size))
    img_arr = preprocess_input(img_arr.reshape(
        1, size[0], size[1], 1 if greys else 3))

    return img_arr


def predict_from_model(img_arr: np.array, models_path: str, model_name: str) -> (list, list):
    """Apply given model to given set of images.

    Parameters:
        img_arr: set of images in numpy array format
        models_path: path to models
        model_name: name of the model to apply

    Out:
       labels: a list of predicted labels (the predicted label is the one with the highest probability)
       proba_labels: probability of predicted label

    """

    # Load model and predict
    model, dic_class = load_files_model(
        path_mod=models_path, mod_name=model_name)
    print(dic_class)
    preds = model.predict(img_arr)
    labels = [np.argmax(preds[k]) for k in range(len(preds))]
    proba_labels = [np.max(preds[k]) for k in range(len(preds))]
    labels = [list(dic_class.keys())[list(dic_class.values()).index(lab)]
              for lab in labels]

    del model, dic_class  # remove model and dict from environment

    return labels, proba_labels


def predict_save(df: pd.DataFrame, all_imgs_arr: np.array, filter_: list, models_path: str,
                 model_name: str, to_fill: str) -> (pd.DataFrame, int):
    """Applies given model to given set of images.

    Parameters:
        df: DataFrame aimed at containing prediction results
            Lines: images; columns: items to predict

        all_imgs_arr: numpy array containing all images
        filter_: explicits the filter for the images on which the model will be applied.
                 Key: column name in the DataFrame; value: value to filter in this column.
                 e.g. {'view': 'Ext'} 

        models_path: path to models
        model_name: name of the model to apply
        to_fill: name of the df column to fill with results ('type', 'manufacturer')

    Out:
       df: DataFrame filled with predicted label and probability
       i: indexes of filtered images (might be useful if you want to apply further
          transformation to these lines in the DataFrame)

    """

    imgs = all_imgs_arr
    ix = []

    # Apply filter(s)
    for k in filter_.keys():
        ix.append(df[df[k] == filter_[k]].index.tolist())
    ind = ix[0]
    if len(ix) == 2:
        ind = list(set(ix[0]).intersection(set(ix[1])))
    imgs = imgs[ind]

    # Get labels with probabilities
    labels, proba_labels = predict_from_model(img_arr=imgs, models_path=models_path,
                                              model_name=model_name)

    # Fill DataFrame
    df[to_fill].loc[ind] = labels
    df[to_fill + '_proba'].loc[ind] = proba_labels

    return df, ind

# Read images to predict

In [4]:
time_s = datetime.datetime.now()

In [5]:
# Path
if social_net == 'SEATGURU':
    folder = 'IMG ' + social_net + '/'

elif social_net == 'INSTAGRAM':
    folder = social_net + '/' + insta_hashtag + '/'

path_pred = project_path + data_path + folder
imgs_names = os.listdir(path_pred)
imgs_names = [img for img in imgs_names if '.jpg' in img]

# Init results DataFrame
df = pd.DataFrame(columns=['img',
                           'view',
                           'view_proba',
                           'manufacturer',
                           'manufacturer_proba',
                           'type',
                           'type_proba'])
df['img'] = imgs_names

In [6]:
# Read all images, convert to array
all_imgs_arr = np.array([read_img(path_pred + imgs_names[k], size=(size[0], size[1]), greys=greys)
                         for k in range(len(imgs_names))])

# Reshape for prediction
all_imgs_arr = all_imgs_arr.reshape(
    len(imgs_names), size[0], size[1], 1 if greys else 3)
all_imgs_arr.shape

(1934, 224, 224, 3)

# Models pipeline

## First step: view

In [7]:
labels, proba_labels = predict_from_model(img_arr=all_imgs_arr, models_path=models_path,
                                          model_name='View')

# Fill DataFrame with View labels
df['view'] = labels
df['view_proba'] = proba_labels

{'Ext': 0, 'Ext_Int': 1, 'Int': 2, 'Meal': 3}


## Exterior

### Manufacturer and type

In [8]:
# If View == 'Ext', predict aircraft type and fill DataFrame
filter_ = dict({'view': 'Ext'})
df, ind = predict_save(df, all_imgs_arr, filter_,
                       models_path, model_name='Ext_typ', to_fill='type')

# Deduce manufacturer from aircraft type
df['manufacturer'].loc[ind] = [
    'Airbus' if 'A' in typ else 'Boeing' for typ in df['type'].loc[ind]]

{'737': 0, '747': 1, '757': 2, '777': 3, '787': 4, 'A320': 5, 'A321': 6, 'A330': 7, 'A340': 8, 'A350': 9, 'A380': 10}


## Interior

### Manufacturer

In [9]:
# If View == 'Int', predict manufacturer
filter_ = dict({'view': 'Int'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path,
                       model_name='Int_man', to_fill='manufacturer')

{'Airbus': 0, 'Boeing': 1}


### Airbus and Boeing types

In [10]:
# If View == 'Int' and manufacturer == 'Airbus', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Airbus'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path,
                       model_name='Int_Airbus', to_fill='type')

# If View == 'Int' and manufacturer == 'Boeing', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Boeing'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path,
                       model_name='Int_Boeing', to_fill='type')

{'A320': 0, 'A321': 1, 'A330': 2, 'A350': 3, 'A380': 4}
{'737': 0, '747': 1, '757': 2, '777': 3}


# Save results as csv

In [11]:
print(datetime.datetime.now() - time_s)

0:11:00.246725


In [12]:
# Path to save
if social_net != 'INSTAGRAM':
    path_save = path_out + '/g7_pred_' + social_net + '_4.csv'

else:
    path_save = path_out + '/g7_pred_' + social_net + '_' + insta_hashtag + '_4.csv'

# Save
df.to_csv(path_or_buf=path_save, sep=';', encoding='utf-8', index=False)