In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
from sewar.full_ref import vifp

import PIL.Image
import cv2
from os import listdir
from tqdm import tqdm
import warnings
import multiprocessing as mp
import logging

In [2]:
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='execution_log.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(lineno)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)

In [3]:
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
warnings.filterwarnings('ignore')

In [4]:
path = os.getcwd()
path = path+'/input/'


In [5]:
df = pd.read_csv(path+'public.csv')

In [6]:
df['path'] = df['Filename'].map(lambda x: path+'train/train/'+x)
df.loc[df.North.isnull(), 'path'] = df.loc[df.North.isnull(), 'Filename'].map(lambda x: path+'test/test/'+x)

In [7]:
df.head()

Unnamed: 0,Filename,Altitude,Delta,North,East,path
0,00003e3b9e5336685200ae85d21b4f5e.jpg,178.829834,-0.065231,-0.386045,0.929772,/home/ec2-user/SageMaker/k_project/input/train...
1,0001261e2060303a06ba6c64d676d639.jpg,207.921478,-0.080688,0.635584,0.152819,/home/ec2-user/SageMaker/k_project/input/train...
2,0002ac0d783338cfeab0b2bdbd872cda.jpg,178.048431,0.021576,-1.228229,-0.499388,/home/ec2-user/SageMaker/k_project/input/train...
3,0004289ee1c7b8b08c77e19878106ae3.jpg,201.084625,0.505981,-1.739709,-0.699928,/home/ec2-user/SageMaker/k_project/input/train...
4,0004d0b59e19461ff126e3a08a814c33.jpg,187.550201,-0.328156,-0.169798,2.828752,/home/ec2-user/SageMaker/k_project/input/train...


In [8]:
df = df.sort_values(by='Altitude').reset_index(drop=True)

In [9]:
df['next_altitude'] = df['Altitude'] + df['Delta']

In [10]:
def select_candidates(df, altitude, var=0.001):
    if (df.loc[df.Altitude == altitude].shape[0] > 0):
        return df.loc[df.Altitude == altitude, 'path'].sort_values().values
    else:
        return df.loc[(df.Altitude >= (altitude - var))&(df.Altitude <= (altitude + var)), 'path'].sort_values().values

In [11]:
df['next_path'] = np.nan

In [12]:
def run_parallel(df_line):
    candidate_altitude = df_line.next_altitude
    candidate_filenames = select_candidates(df, candidate_altitude)

    img_now = cv2.imread(df_line.path)
    img_now_2 = img_now[:, 120:]
    
    df_line.loc['next_path'] = np.nan
    for next_path in candidate_filenames:
        img_next = cv2.imread(next_path)
        img_next_1 = img_next[:, :120]
        metric = vifp(img_now_2, img_next_1)
        if metric >= 0.5:
            df_line.loc['next_path'] = next_path
            break
    return df_line

In [13]:
df_to_study = df.sample(1000, random_state=123)
df_to_study.head(10)

Unnamed: 0,Filename,Altitude,Delta,North,East,path,next_altitude,next_path
56891,a8ae6106b51fa41d5f17865c0d958263.jpg,180.903137,-0.151062,0.332506,2.193729,/home/ec2-user/SageMaker/k_project/input/train...,180.752075,
125633,96ade60d0d172493eb7aa33f70114378.jpg,209.406189,-0.096405,,,/home/ec2-user/SageMaker/k_project/input/test/...,209.309784,
18370,453f20d0233ae9781c3b2371c31d2bec.jpg,166.134323,0.041748,,,/home/ec2-user/SageMaker/k_project/input/test/...,166.176071,
66373,12401c7cd993a9a0ad39d0fb95e46877.jpg,184.051636,-0.068466,,,/home/ec2-user/SageMaker/k_project/input/test/...,183.98317,
57529,5b6f6f2df2bd1f461a19c19f65161223.jpg,181.098526,0.043961,0.441885,-0.118098,/home/ec2-user/SageMaker/k_project/input/train...,181.142487,
18455,98272a513b7e9c569b6f3640d5407d8c.jpg,166.182999,-0.005417,1.546243,1.187768,/home/ec2-user/SageMaker/k_project/input/train...,166.177582,
88745,eed77a7948ecd4087ef931b0faac64cb.jpg,191.792328,-0.172638,0.402565,2.559834,/home/ec2-user/SageMaker/k_project/input/train...,191.61969,
121215,42bd928eb7f73ba1a4ffee5e96616e93.jpg,206.915375,-0.29306,-0.213397,-2.433524,/home/ec2-user/SageMaker/k_project/input/train...,206.622314,
109391,60df7a6c1b259a7353138e3b6f62383f.jpg,200.772095,0.470276,0.691502,-2.514604,/home/ec2-user/SageMaker/k_project/input/train...,201.242371,
139297,123c5d6ad36b988ccdf849fdb679705a.jpg,219.497696,-0.020035,,,/home/ec2-user/SageMaker/k_project/input/test/...,219.477661,


In [14]:
pool = mp.Pool(mp.cpu_count())

In [15]:
logging.warning('Starting search in parallel')

In [None]:
%%time
results = pool.map(run_parallel, [row for index, row in df.iterrows()])
pool.close()

In [None]:
logging.warning('Ending search in parallel')

logging.warning('Starting concat of all results')

In [None]:
df_final = pd.DataFrame()
for df_temp in results:
    df_final = pd.concat([df_final, pd.DataFrame([df_temp])])

In [None]:
logging.warning('Ending concat of all results')

In [None]:
df_final.head()

In [None]:
logging.warning('Starting save of dataframe')

In [None]:
df_final.to_csv('df_with_next.csv', index=False)

In [None]:
logging.warning('Ending save of dataframe')