created on: Tue Jan 14 09:44:36 2020
<br>
Group 7
<br>
@authors: V.B., E.G.

In [35]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from PIL import Image
import h5py
import warnings
warnings.filterwarnings('ignore')
import datetime

# Deep learning
import keras
from keras.models import load_model
from keras.applications.imagenet_utils import decode_predictions

In [36]:
time_s = datetime.datetime.now()

In [37]:
project_path = 'C:/Users/emma.grandgirard/Documents/B - Projet Interpromo/'
models_path = project_path + 'Models/'
data_path = 'Data/data_Interpromo2020/Interpromo2020/All Data/ANALYSE IMAGE/'
path_out = project_path + 'pred'

# Choose social network: SEATGURU, SEATMAESTRO, INSTAGRAM
social_net = 'INSTAGRAM'
insta_hashtag = 'airbus'  # if social_net == 'INSTAGRAM'

# Choose images parameters
size = (224, 224)
greys = False

# Read images to predict

In [38]:
def read_img(img_path: str, size: tuple, greys: bool=False) -> np.array:
    
    img_img = Image.open(img_path)
    img_arr = np.array(img_img.resize(size))
    img_arr = img_arr.reshape(1, size[0], size[1], 1 if greys else 3)
    
    return img_arr

In [40]:
# Path
if social_net == 'SEATGURU' or social_net == 'SEATMAESTRO':
    folder = 'IMG ' + social_net + '/'

elif social_net == 'INSTAGRAM':
    folder = social_net + '/' + insta_hashtag + '/'
    
path_pred = project_path + data_path + folder
imgs_names = os.listdir(path_pred)
imgs_names = [img for img in imgs_names if 'xlsx' not in img]

# Init results DataFrame
df = pd.DataFrame(columns=['img', 
                           'view', 
                           'view_proba', 
                           'manufacturer', 
                           'manufacturer_proba', 
                           'type', 
                           'type_proba'])
df['img'] = imgs_names

In [41]:
# Read all images, convert to array
all_imgs_arr = np.array([read_img(path_pred + imgs_names[k], size=(size[0], size[1]), greys=greys) 
                         for k in range(len(imgs_names))])

# Reshape for prediction
all_imgs_arr = all_imgs_arr.reshape(len(imgs_names), size[0], size[1], 1 if greys else 3)
all_imgs_arr.shape

(1976, 224, 224, 3)

In [42]:
%run g7_pickle_save_load.ipynb

# Models pipeline

#### Functions

In [43]:
def predict_from_model(img_arr: np.array, models_path: str, model_name: str) -> str:
    
    # Load model and predict
    model, dic_class = load_files_model(path_mod=models_path, mod_name=model_name)
    preds = model.predict(img_arr)
    labels = [np.argmax(preds[k]) for k in range(len(preds))]
    proba_labels = [np.max(preds[k]) for k in range(len(preds))]
    labels = [list(dic_class.keys())[list(dic_class.values()).index(lab)] for lab in labels]
    
    del model, dic_class  # remove model and dict from environment
    
    return labels, proba_labels

In [44]:
def predict_save(df: pd.DataFrame, imgs_arr: np.array, filter_:list, models_path: str, model_name: str, to_fill: str):
    
    # Apply filter(s)
    for k in range(len(filter_)):
        col = list(filter_.keys())[k]
        val = list(filter_.values())[k]
        i = df[df[col] == val].index
        imgs_arr = all_imgs_arr[i]

    # Get labels with probabilities
    labels, proba_labels = predict_from_model(img_arr=imgs_arr, models_path=models_path,
                                              model_name=model_name)

    # Fill DataFrame
    df[to_fill].loc[i] = labels
    df[to_fill + '_proba'].loc[i] = proba_labels
    
    return df, i

## First step: view

In [None]:
labels, proba_labels = predict_from_model(img_arr=all_imgs_arr, models_path=models_path,
                                          model_name='View')

# Fill DataFrame with View labels
df['view'] = labels
df['view_proba'] = proba_labels

## Exterior

### Manufacturer and type

In [None]:
# If View == 'Ext', predict aircraft type and fill DataFrame
filter_ = dict({'view': 'Ext'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Ext_typ', to_fill='type')

# Deduce manufacturer from aircraft type
df['manufacturer'].loc[ind] = ['Airbus' if 'A' in typ else 'Boeing' for typ in df['type'].loc[ind]]

## Interior

### Manufacturer

In [None]:
# If View == 'Int', predict manufacturer
filter_ = dict({'view': 'Int'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_man', to_fill='manufacturer')

### Airbus and Boeing types

In [None]:
# If View == 'Int' and manufacturer == 'Airbus', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Airbus'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_Airbus', to_fill='type')

# If View == 'Int' and manufacturer == 'Boeing', predict type
filter_ = dict({'view': 'Int',
                'manufacturer': 'Boeing'})
df, ind = predict_save(df, all_imgs_arr, filter_, models_path, model_name='Int_Boeing', to_fill='type')

# Save results as csv

In [None]:
# Path to save
if social_net != 'INSTAGRAM':
    path_save = path_out + '/pred_' + social_net + '.csv'
    
else:
    path_save = path_out + '/pred_' + social_net + '_' + insta_hashtag + '.csv'

# Save
df.to_csv(path_or_buf=path_save, sep=';', encoding='utf-8')

In [None]:
print(datetime.datetime.now() - time_s)