# Steps
- Process all unprocessed pictures (this notebook, also a cronjob)
- Train model (done only once with notebook)
- Run predictions for all unprocessed files (notebook 3, also cronjob)
- Get predictions from database (telegram bot)

In [2]:
import utils
import cv2
import matplotlib.pyplot as plt
import time
import pandas as pd

from sklearn import linear_model
from sklearn.model_selection import train_test_split
import datetime

import os

conn = utils.get_conn()

In [2]:
pd.read_sql('SHOW tables', conn)

Unnamed: 0,Tables_in_dc
0,pictures
1,predictions
2,processed
3,scores


# Update pictures table

In [3]:
pictures = os.listdir('pics')
pics = pd.DataFrame(pictures, columns=['file'])
pics.to_sql('pictures', conn, if_exists='replace')

In [7]:
print("Found {} pictures".format(len(pics)))

Found 8791 pictures


## Get list of unprocessed files

In [4]:
sql = """
SELECT a.file
FROM (SELECT DISTINCT file FROM pictures) a
LEFT JOIN (SELECT DISTINCT file FROM processed) b
    ON a.file = b.file
WHERE b.file IS NULL
"""
unprocessed_files = pd.read_sql(sql, conn)
print("Found {} unprocessed files.".format(unprocessed_files.index.size))

Found 1836 unprocessed files.


## Load pretrained model

In [8]:
from keras.preprocessing import image
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array

from keras.applications import MobileNet
from keras.applications.mobilenet import preprocess_input
from keras.applications.mobilenet import decode_predictions

model_name = 'mobilenet_2'

model = MobileNet(weights='imagenet')
model.layers.pop()
model.summary()

Using TensorFlow backend.
  return f(*args, **kwds)
  return f(*args, **kwds)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseC

In [9]:
def get_features(filename):
    image = load_img(filename, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)
    return model.predict(image).ravel() 

# Process files

In [10]:
current_time = datetime.datetime.now().isoformat()

In [None]:
# TODO: Process only files which haven't been processed yet
for filename in unprocessed_files['file']:
    try:
        representation = get_features(utils.img_folder + filename)
    except:
        print('error processing file', filename)

    with open("data/{}/{}.csv".format(model_name, filename), "w" ) as f:
        f.write(filename + "," + ",".join([str(a) for a in representation]) + "\n")

In [None]:
unprocessed_files.loc[0, 'model'] = model_name
unprocessed_files.loc[0, 'time'] = current_time

In [None]:
unprocessed_files['model'] = model_name
unprocessed_files['time'] = 'current_time'
unprocessed_files.to_sql('processed', conn, if_exists='append')

In [None]:
sql = """
SELECT a.file
FROM pictures a
"""
df = pd.read_sql(sql, conn)

# Store a single file to the table

In [None]:
if False:
    df = pd.read_csv('data/mobilenet_2/2019-03-03T18:45:56.800656.csv', header=None)

    processed_files = pd.DataFrame()

    processed_files['file'] = df[0]
    processed_files['time'] = '2019-03-03T18:45:56.800656'
    processed_files.loc[0, 'model'] = 'mobilenet_2'

    processed_files.to_sql('processed', conn, if_exists='append')

# Move pred to individual files

In [None]:
if False:
    df = pd.read_csv('data/mobilenet_2/2019-03-03T18:45:56.800656.csv', header=None)

    for ind, row in df.iterrows():
        filename = row[0]
        with open("data/{}/{}.csv".format(model_name, filename), "w" ) as f:
            f.write(",".join([str(a) for a in row.values]))

    pd.read_csv('data/mobilenet_2/' + df.loc[0, 0] + ".csv")