# Estimating parameters from beans images

## Libraries required

In [1]:
!pip install ktrain

Collecting ktrain
  Downloading ktrain-0.41.2.tar.gz (25.3 MB)
     ---------------------------------------- 0.0/25.3 MB ? eta -:--:--
     ---------------------------------------- 0.1/25.3 MB 3.5 MB/s eta 0:00:08
      --------------------------------------- 0.4/25.3 MB 5.5 MB/s eta 0:00:05
     - -------------------------------------- 0.7/25.3 MB 6.7 MB/s eta 0:00:04
     - -------------------------------------- 0.9/25.3 MB 8.0 MB/s eta 0:00:04
     -- ------------------------------------- 1.6/25.3 MB 9.2 MB/s eta 0:00:03
     --- ------------------------------------ 2.0/25.3 MB 9.2 MB/s eta 0:00:03
     --- ------------------------------------ 2.4/25.3 MB 9.7 MB/s eta 0:00:03
     ---- ----------------------------------- 2.9/25.3 MB 10.1 MB/s eta 0:00:03
     ----- ---------------------------------- 3.3/25.3 MB 10.4 MB/s eta 0:00:03
     ----- ---------------------------------- 3.7/25.3 MB 10.2 MB/s eta 0:00:03
     ------ --------------------------------- 4.1/25.3 MB 10.5 MB/s eta 


[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: C:\Users\ogabr\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os
os.environ['DISABLE_V2_BEHAVIOR'] = '1'
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import ktrain
from ktrain import vision as vis
import numpy as np
import pandas as pd
import shutil
import re
import random

# Unzip images

In [None]:
import zipfile
with zipfile.ZipFile("beans.zip","r") as zip_ref:
    zip_ref.extractall("./")

# Rename files according to parameter

In [None]:
os.makedirs('/content/a/')
os.makedirs('/content/b/')
os.makedirs('/content/L/')

In [None]:
dt = pd.read_csv('/content/beans.csv')
dt.head()

In [None]:
dt[dt['filename'].isin(['e41', 'e58'])]

In [None]:
print(dt.iloc[123:126,[2,3,4]].mean())
print(dt.iloc[174:177,[2,3,4]].mean())

### Selecting the parameter (L, a, or b)

In [None]:
parameter = 'a'
dtP = dt.groupby(['filename'], as_index=False ).max()
dtP = dtP[['filename', parameter]]
dtP.head()

### Renaming images files according to the selected parameter

In [None]:
import shutil

In [None]:
for i in range(dtP.shape[0]):
  shutil.copy2('/content/beans/'+dtP.loc[i,'filename']+'.jpg', '/content/'+parameter+'/'+str(np.round(dtP.loc[i,parameter],2))+'.jpg')

### Removing directory

In [None]:
#import shutil
#shutil.rmtree('/content/beans/', ignore_errors=True)

### Defining regular expression to read L values from images names

In [None]:
# build a regular expression that extracts the age from file name
PATTERN = r'([^/]+)\d+.jpg$'
import re
p = re.compile(PATTERN)
#r = p.search('UTKFace/30_1_2_20170116191309887.jpg.chip.jpg')
r = p.search('pictures/52.87.jpg')
print("Extracted L:%s" % (float(r.group(1))))

### Setting up the default directory

Set `DATADIR` to the folder where you extracted all the images.

In [None]:
DATADIR=parameter

# Building training and validation sets

In [None]:
data_aug = vis.get_data_aug(horizontal_flip=True, vertical_flip=True)
(train_data, val_data, preproc) = vis.images_from_fname(DATADIR, pattern = PATTERN, data_aug = data_aug, val_pct=0.1,
                                                        is_regression=True, random_state=42)

# Creating a Model and Wrap in `Learner`

In [None]:
vis.print_image_regression_models()

In [None]:
model = vis.image_regression_model('pretrained_resnet50', train_data, val_data)

## Wrapping model and data in Learner object

In [None]:
learner = ktrain.get_learner(model=model, train_data=train_data, val_data=val_data,
                             workers=8, use_multiprocessing=False, batch_size=16)

In [None]:
learner.model.summary()

## Estimating the Learning Rate

We will select a learning rate associated with falling loss from the plot displayed.

In [None]:
learner.lr_find(max_epochs=2)
learner.lr_plot()

## Training Model

In [None]:
#learner.fit_onecycle(0.001, 15)
learner.autofit(0.005, 20, reduce_on_plateau=3)

## Make Predictions

Let's make predictions on individual photos. We could either randomly select from the entire image directory or select just from the validation images.

In [None]:
# get a Predictor instance that wraps model and Preprocessor object
predictor = ktrain.get_predictor(learner.model, preproc)

In [None]:
# how to get validation filepaths
val_data.filenames

In [None]:
def img_prediction(predictor, fname):
    fname = DATADIR+'/'+fname
    predicted = float(predictor.predict_filename(fname)[0])
    actual = float(p.search(fname).group(1))
    #vis.show_image(fname)
    #print('predicted:%s | actual: %s' % (predicted, actual))
    return [predicted, actual]

In [None]:

re = []
k = 0
for i in val_data.filenames:
  pred, act = img_prediction(predictor, i)
  err = np.abs(pred - act)
  print(k, "-" ,i, err, "-", pred)
  k = k + 1
  re.append(err)

In [None]:
np.mean(re)

In [None]:
def show_prediction(predictor, fname):
    print(fname)
    predicted = float(predictor.predict_filename(fname)[0])
    actual = float(p.search(fname).group(1))
    vis.show_image(fname)
    print('predicted:%s | actual: %s' % (predicted, actual))

In [None]:
show_prediction(predictor, DATADIR +'/'+ val_data.filenames[8])

In [None]:
#dtP['a'] = np.round(dtP['a'],2)
dtP[dtP['a']==24.01]


In [None]:
!pip install git+https://github.com/amaiya/eli5@tfkeras_0_10_1

In [None]:
predictor.explain( DATADIR +'/'+ val_data.filenames[8])

In [None]:
!pip install https://github.com/amaiya/eli5-tf/archive/refs/heads/master.zip

# Ensemble

In [None]:
dt = pd.read_csv('/content/beans.csv')

parameters = ['L','a', 'b']
operators = ['min', 'max']

PATTERN = r'([^/]+)\d+.jpg$'

p = re.compile(PATTERN)

import pdb

def buildModel(bean_p, operation, data, PATTERN):

  DATADIR = bean_p+'_'+operation+'/'
  os.makedirs('/content/'+DATADIR)
  print('Estimating '+bean_p+' using '+operation)

  if operation == 'max':
    print('MAX')
    dts = data.groupby(['filename'], as_index=False ).max()
  if operation == 'min':
    print('MIN')
    dts = data.groupby(['filename'], as_index=False ).min()
  if operation == 'mean':
    print('MEAN')
    dts = data.groupby(['filename'], as_index=False ).mean()

  dts = dts[['filename', bean_p]]
  for i in range(dts.shape[0]):
    shutil.copy2('/content/beans/'+dts.loc[i,'filename']+'.jpg', '/content/'+DATADIR+str(np.round(dts.loc[i,bean_p],2))+'.jpg')

  data_aug = vis.get_data_aug(horizontal_flip=True, vertical_flip=True)
  (train_data, val_data, preproc) = vis.images_from_fname(DATADIR, pattern = PATTERN, data_aug = data_aug, val_pct=0.1, is_regression=True, random_state=42)

  model = vis.image_regression_model('default_cnn', train_data, val_data)

  learner = ktrain.get_learner(model=model, train_data=train_data, val_data=val_data, workers=8, use_multiprocessing=False, batch_size=16)
  #learner.fit_onecycle(0.0001, 50)
  learner.autofit(0.05, 30, reduce_on_plateau=3)
  #learner.fit_onecycle(0.0001, 10)

  predictor = ktrain.get_predictor(learner.model, preproc)

  return predictor


### Removing testing samples

In [None]:
def img_prediction(predictor, fname):
    predicted = float(predictor.predict_filename(fname)[0])
    return predicted

In [None]:

shutil.rmtree('/content/a_min/', ignore_errors=True)
shutil.rmtree('/content/L_min/', ignore_errors=True)
shutil.rmtree('/content/b_min/', ignore_errors=True)
shutil.rmtree('/content/L_max/', ignore_errors=True)
shutil.rmtree('/content/a_max/', ignore_errors=True)
shutil.rmtree('/content/b_max/', ignore_errors=True)

v_ensaios = list(dt['filename'].unique())
v_ensaios = list(dt['filename'].unique())
random.shuffle(v_ensaios)
partitions = [10,10,10,10,10,10,10,10,10,5]
result = pd.DataFrame()

for k in partitions:
  test = v_ensaios[:k]
  print(test)
  del v_ensaios[:k]
  dtTr = dt[~dt['filename'].isin([test])]
  models = []


  x_test = dt[dt['filename'].isin(test)]
  x_test = x_test.groupby(['filename'], as_index=False ).mean()
  re_pi = None
  for pi in parameters:
    re_opi = pd.DataFrame()
    for opi in operators:
      aux = []
      print('Parameter --> '+pi, ' and operator --> ',opi)
      model = buildModel(pi, opi, dtTr, PATTERN)
      for ei in test:
        pred = img_prediction(model, '/content/'+'beans'+'/'+ ei+'.jpg')
        aux.append(pred)
      re_opi = re_opi.append(pd.DataFrame([aux]))
    re_opi = re_opi.mean()

    x_test_pi = x_test[[pi]]
    error = np.abs(x_test_pi[pi] - re_opi)
    re_pi = pd.concat([re_pi, pd.DataFrame(error)], axis=1)
    print(re_pi)
  result = result.append(pd.concat([pd.DataFrame(x_test['filename']),pd.DataFrame(re_pi)], axis=1))




    #re = re.append(pd.DataFrame([ei, pi, np.mean(aux), dt[dt['filename'] == ei][pi].mean(),
    #                          np.abs(np.mean(aux)- dt[dt['filename'] == ei][pi].mean())]).T)
  shutil.rmtree('/content/a_min/', ignore_errors=True)
  shutil.rmtree('/content/L_min/', ignore_errors=True)
  shutil.rmtree('/content/b_min/', ignore_errors=True)
  shutil.rmtree('/content/L_max/', ignore_errors=True)
  shutil.rmtree('/content/a_max/', ignore_errors=True)
  shutil.rmtree('/content/b_max/', ignore_errors=True)
re

In [None]:
import random
v_ensaios = list(dt['filename'].unique())

In [None]:
shutil.rmtree('/content/a_min/', ignore_errors=True)
shutil.rmtree('/content/L_min/', ignore_errors=True)
shutil.rmtree('/content/b_min/', ignore_errors=True)
shutil.rmtree('/content/L_max/', ignore_errors=True)
shutil.rmtree('/content/a_max/', ignore_errors=True)
shutil.rmtree('/content/b_max/', ignore_errors=True)

In [None]:
re.append(pd.DataFrame([ei, pi, np.mean(aux), dt[dt['filename'] == ei][pi].mean(),
                            np.abs(np.mean(aux)- dt[dt['filename'] == ei][pi].mean())]).T)


In [None]:
def show_prediction(predictor, actual, fname):
    print(fname)
    predicted = float(predictor.predict_filename(fname)[0])
    vis.show_image(fname)
    print('predicted:%s | actual: %s' % (predicted, actual))

In [None]:
shutil.rmtree('/content/a_min/', ignore_errors=True)
shutil.rmtree('/content/L_max/', ignore_errors=True)


In [None]:
id = 0
show_prediction(models[0]['model'], dt[dt['filename'] == val_ens[id]]['L'].mean(), '/content/'+'beans'+'/'+ val_ens[id]+'.jpg')

In [None]:
show_prediction(models[1]['model'], dt[dt['filename'] == val_ens[id]]['L'].mean(), '/content/'+'beans'+'/'+ val_ens[id]+'.jpg')

In [None]:
show_prediction(models[2]['model'], dt[dt['filename'] == val_ens[id]]['L'].mean(), '/content/'+'beans'+'/'+ val_ens[id]+'.jpg')

In [None]:
re = pd.DataFrame()
for im in range(0,len(val_ens)):

  for pi in parameters:
    aux = []
    l_i_p = [x['parameter']==pi for x in models]
    l_models_i = np.where(np.array(l_i_p)== True)[0]

    param_models = models[l_models_i[0]:l_models_i[2]+1]
    param_models = param_models[0:2]

    for opi in param_models:
      pred = img_prediction(opi['model'], '/content/'+'beans'+'/'+ val_ens[im]+'.jpg')
      aux.append(pred)
    re = re.append(pd.DataFrame([val_ens[im], pi, np.mean(aux), dt[dt['filename'] == val_ens[im]][opi['parameter']].mean(),
                            np.abs(np.mean(aux)- dt[dt['filename'] == val_ens[im]][opi['parameter']].mean())]).T)



In [None]:
re.columns = ['filename', 'parameter', 'pred', 'actual', 'error']
re

In [None]:
models[0:2]

In [None]:
val_ens