# Sift init in Colab

In [None]:
!pip uninstall opencv-python
!pip install -U opencv-contrib-python==3.4.0.12

In [3]:
import sys
if 'google.colab' in sys.modules:
    import subprocess 
    subprocess.call('apt-get install subversion'.split())
    subprocess.call('svn export https://github.com/YoniChechik/AI_is_Math/trunk/c_08_features/left.jpg'.split())
    subprocess.call('svn export https://github.com/YoniChechik/AI_is_Math/trunk/c_08_features/right.jpg'.split())
    subprocess.call('pip uninstall opencv-python'.split())
    subprocess.call('pip install -U opencv-contrib-python==3.4.0.12'.split())

# Init

In [4]:
import numpy as np
import cv2 as cv
import glob
import os
from sklearn.cluster import MiniBatchKMeans
from numpy import genfromtxt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# LF-Net

In [None]:
import os
os.chdir("/content/drive/My Drive/Height-regression/height-estimation/lf-net")
!git clone https://github.com/alekseychuiko/lf-net.git

In [None]:
os.chdir("/content/drive/My Drive/Height-regression/height-estimation/lf-net")
!pip install -r requirements.txt

In [None]:
import os
os.chdir("/content/drive/My Drive/Height-regression/height-estimation/lf-net")
!python run_lfnet.py --in_dir=../data/train/mask2/cg_25m --out_dir=../data/train/mask2/cg_25m_lfnet

# Global vars

In [6]:
BASE_DIR = "/content/drive/My Drive/Height-regression/height-estimation/data"

BASE_TRAIN_IMG_DIR = os.path.join(BASE_DIR, "{}/mask2/cg_25m".format("train"))
TRAIN_IMG_LIST = glob.glob(os.path.join(BASE_TRAIN_IMG_DIR, "*.png"))

BASE_TEST_IMG_DIR = os.path.join(BASE_DIR, "{}/mask2/cg_25m".format("test"))
TEST_IMG_LIST = glob.glob(os.path.join(BASE_TEST_IMG_DIR, "*.png"))

y_train = genfromtxt(os.path.join(BASE_DIR, "{}/height.csv".format("train")), delimiter=',')
y_test = genfromtxt(os.path.join(BASE_DIR, "{}/height.csv".format("test")), delimiter=',')
K = 1500

# Sorting file name - sift extraction - clustering - histogram - regression_model - training - prediction

In [7]:
def sort_filename (img_list, base_img_dir):
    
    img_id_list = [int(imgfile.split("/")[-1][:-4]) for imgfile in img_list]
    sorted_filename = [ os.path.join(base_img_dir, "{}.png".format(str(img_id))) for img_id in sorted(img_id_list) ]
    
    return sorted_filename

In [8]:
def sift_extraction (img_list):
    
    dico = []
    for imgfile in img_list:
        
        img = cv.imread(imgfile)
        sift = cv.xfeatures2d.SIFT_create()
        kp, des = sift.detectAndCompute(img, None)
        
        if des is None:
            no_kp = np.zeros((1, sift.descriptorSize()), np.float32)
            dico.append(no_kp[0])
        elif des is not None:
            for d in des:
                if isinstance(np.float64(d), np.floating):
                    print (imgfile)
                dico.append(d)
                
    return dico

In [9]:
def clustering (dico):
    
    k = K
    batch_size = 100
    init_size=3*k
    kmeans = MiniBatchKMeans(n_clusters=k, batch_size=batch_size, init_size=init_size, verbose=1).fit(dico)
    
    return kmeans

In [10]:
def histogram (img_list, kmeans):
    
    histo_list = []
    for imgfile in img_list:
        
        img = cv.imread(imgfile)
        #gray = cv.cvtColor (img, cv.COLOR_BGR2GRAY)
        sift = cv.xfeatures2d.SIFT_create()
        kp, des = sift.detectAndCompute(img, None)
        
        histo = np.zeros(K)
        
        if des is None:
            nkp = 1
            d = np.zeros((1, sift.descriptorSize()), np.float32) # no keypoint         
            idx = kmeans.predict([d[0]])
            histo[idx] += 1/nkp
        elif des is not None:
            nkp = np.size(kp)
            
            for d in des:
                idx = kmeans.predict([d])
                histo[idx] += 1/nkp #normalized the histogram
            
        histo_list.append(histo)
        
    return histo_list

In [11]:
def regression_model (histo_list, y):
  
    X = np.array(histo_list)    
    regr = RandomForestRegressor(max_depth=500, random_state=50)
    regr.fit(X, y)
    
    return regr

In [12]:
def prediction (img_list, base_img_dir, kmeans, regr, y_test):
    
    sorted_filename = sort_filename (img_list, base_img_dir)
    histo_list = histogram (sorted_filename, kmeans)
    y_reg = regr.predict(histo_list)
    
    return y_reg

In [13]:
def training (img_list, base_img_dir, y_train):
    
    sorted_filename = sort_filename (img_list, base_img_dir)
    dico = sift_extraction(sorted_filename)
    kmeans = clustering(dico)
    histo_list = histogram (sorted_filename, kmeans)
    regr = regression_model (histo_list, y_train)
    
    return kmeans, regr, y

In [15]:
def tunning (img_list, base_img_dir, y_train):
    sorted_filename = sort_filename (img_list, base_img_dir)
    dico = sift_extraction(sorted_filename)
    kmeans = clustering(dico)
    histo_list = histogram (sorted_filename, kmeans)

    params = {'bootstrap': [True, False],
            'max_depth': [25, 50, 75, 100, None],
            'max_features': ['auto', 'sqrt'],
            'min_samples_leaf': [1, 2, 4],
            'min_samples_split': [2, 5, 10],
            'n_estimators': [200, 500, 800, 1000, 1500, 2000]}

    model = RandomForestRegressor()

    grid_search = GridSearchCV(estimator = model, param_grid = params, 
                              cv = 3, n_jobs = -1, verbose = 2)
    grid_search.fit(histo_list, y_train)
    print (grid_search.best_params_)




# Main

In [None]:
kmeans, regr = training (TRAIN_IMG_LIST, BASE_TRAIN_IMG_DIR, y_train)
y_reg = prediction (TEST_IMG_LIST, BASE_TEST_IMG_DIR, kmeans, regr, y_test)
print ("MAE: {}".format(mean_absolute_error(y_test, y_reg)))

# Tunning

In [None]:

tunning (TRAIN_IMG_LIST, BASE_TRAIN_IMG_DIR, y_train)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and total inertia
Computing label assignment and tota

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 22.3min
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed: 87.4min
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed: 127.0min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed: 224.7min
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed: 405.3min
