### Imports

In [67]:
import pandas as pd
import cv2
from matplotlib import pyplot as plt
import numpy as np  
import os

from requests import post
import json
import base64
import re
from PIL import Image
import shutil

### Defs

In [68]:
def get_iam_token(iam_url, oauth_token):
    response = post(iam_url, json={"yandexPassportOauthToken": oauth_token})
    json_data = json.loads(response.text)
    if json_data is not None and 'iamToken' in json_data:
        return json_data['iamToken']
    return None

# Функция отправляет на сервер запрос на распознавание изображения и возвращает ответ сервера. 
def request_analyze(vision_url, iam_token, folder_id, image_data): 
    response = post(vision_url, headers={'Authorization': 'Bearer '+iam_token}, json={ 
        'folderId': folder_id, 
        'analyzeSpecs': [ 
            { 
                'content': image_data, 
                'features': [ 
                    { 
                        'type': 'TEXT_DETECTION', 
                        'textDetectionConfig': {'languageCodes': ['en', 'ru']} 
                    } 
                ], 
            } 
        ]}) 
    return response

def encode_file(file):
  file_content = file.read()
  return base64.b64encode(file_content)

In [69]:
#displaying-different-images-with-actual-size-in-matplotlib-subplot
def display(im_path):
    dpi = 80
    im_data = plt.imread(im_path)

    height, width  = im_data.shape[:2]
    
    # What size does the figure need to be in inches to fit the image?
    figsize = width / float(dpi), height / float(dpi)

    # Create a figure of the right size with one axes that takes up the full figure
    fig = plt.figure(figsize=figsize)
    ax = fig.add_axes([0, 0, 1, 1])

    # Hide spines, ticks, etc.
    ax.axis('off')

    # Display the image.
    ax.imshow(im_data, cmap='gray')

    plt.show()
    

def noise_removal(image):
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.erode(image, kernel, iterations=1)
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    image = cv2.medianBlur(image, 3)
    return (image)

def is_vin_number(string):
    pattern = r'^[A-HJ-NPR-Z0-9]{17}$'
    return bool(re.match(pattern, string))

### Recognition function

In [70]:
def preprocessing(path):
    image_file = path
    img = cv2.imread(image_file)
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh2 = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 43, 5) 
    return thresh2

In [71]:
token = 'y0_AgAAAAB0C5bdAATuwQAAAAD6VzoGAAA546pjk2xKgJ6BBauxNGAW_m6gUg'
iam_url = 'https://iam.api.cloud.yandex.net/iam/v1/tokens'
iam_token_global = get_iam_token(iam_url, token)

In [72]:
def yandex_recognition_full(file_link, iam_token = iam_token_global):
    vision_url = 'https://vision.api.cloud.yandex.net/vision/v1/batchAnalyze'

    folder_id = 'b1gpfo4tsd8fm51jlbqp'

    with open(file_link, "rb") as f:
        image_data = base64.b64encode(f.read()).decode('utf-8')    
        

    response = request_analyze(vision_url, iam_token, folder_id, image_data)
    
    return response

In [73]:
def yandex_recognition(file_link, iam_token = iam_token_global):
    vision_url = 'https://vision.api.cloud.yandex.net/vision/v1/batchAnalyze'

    folder_id = 'b1gpfo4tsd8fm51jlbqp'

    with open(file_link, "rb") as f:
        image_data = base64.b64encode(f.read()).decode('utf-8')    
        

    response = request_analyze(vision_url, iam_token, folder_id, image_data)
    
    vin = 'Ошибка распознавания'
    try:
        for i in response.json()['results'][0]['results'][0]['textDetection']['pages'][0]['blocks']:
            for j in i['lines']:
                for k in j['words']:
                    k_zv = k['text'].replace('*', '')
                    k_zv = k_zv.replace('O', 'D')
                    k_zv = k_zv.replace('+', '')
                    k_zv = k_zv.replace('j', 'J')
                    k_zv = k_zv.replace('-', '')
                    k_zv = k_zv.replace('#', '')
                    if is_vin_number(k_zv):
                        vin = k_zv
    except:
        vin = 'Ошибка распознавания'
    
    return vin

### Data initialization

In [74]:
#local path
#path = r"C:\Apps\Projects\VIN_codes\all_vins_etalon.xlsx"

#global path
path = r"C:\Apps\Projects\VIN_codes\all_vins_etalon.xlsx"


etalon_vins = pd.read_excel(path)
etalon_vins = etalon_vins.set_index('VIN')
etalon_vins

  warn("""Cannot parse header or footer so it will be ignored""")


Unnamed: 0_level_0,№п/п,Бренд,Модель,Стоянка,Наличие,Номер ключа,ГОС. НОМЕР,Unnamed: 8,Состояние,ДЦ,Unnamed: 11,Проверка,Unnamed: 13,Дубли
VIN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ZAR94000007321059,1,Alfa Romeo,Giulietta,Джип,1,1.0,О803ОЕ797,,БУ,Алтуфьево,,Ок,,1
WAUZZZ8P2AA113847,2,Audi,A3,Джип,1,272.0,У879АК 797,,БУ,Алтуфьево,,Ок,,1
WAUZZZ8P4CA134881,3,Audi,A3,Джип,1,55.0,А152ЕМ 790,,БУ,Алтуфьево,,Ок,,1
WAUZZZ8V1J1060878,4,Audi,A3,Джип,1,81.0,М973ХВ 134,,БУ,Алтуфьево,,Ок,,1
WAUZZZ8V1J1065711,5,Audi,A3,Джип,1,123.0,М350ХС 134,,БУ,Алтуфьево,,Ок,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EBKSCX200P0000774,13899,Sollers,ARGO Chassis,Рольф октябрьская,,,,,Новые,СПБ Витебский,,Ок,,3
EBKSCX200P0000536,13900,Sollers,ARGO Chassis,Рольф октябрьская,,,,,Новые,СПБ Витебский,,Ок,,3
EBECXM209P0006955,13901,Sollers,ATLANT Chassis Dealer,Автофургон,,,,,Новые,СПБ Витебский,,Ок,,1
LSVUH60X4PN011734,13902,Skoda,Karoq,РОЛЬФ ЯСЕНЕВО,,,,,Новые,СПБ Витебский,,Ок,,1


In [75]:
all_vins = etalon_vins.index

In [65]:
#network folder
input_path = r"R:\FAS\Valuation\Phoenix\02.01 Site Inspection\Лахта\Антон"
output_path = r"R:\FAS\Valuation\Phoenix\02.01 Site Inspection\Лахта"

output_folder_path = output_path + '\\' + input_path.split('\\')[-2] + '_' + input_path.split('\\')[-1] + '_sorted_v3'

In [79]:
#local folder
input_path = r"C:\Apps\Projects\VIN_codes\real_dataset"
output_path = r"C:\Apps\Projects\VIN_codes"

output_folder_path = output_path + '\\' + input_path.split('\\')[-1] + '_sorted'

In [24]:
not_found_path = output_folder_path + '\\' + '!not_found'
bad_recognition_path = output_folder_path + '\\' + '!bad_recognition'

In [None]:

not_found_path = output_folder_path + '\\' + '!not_found'
bad_recognition_path = output_folder_path + '\\' + '!bad_recognition'

os.mkdir(output_folder_path)
os.mkdir(not_found_path)
os.mkdir(bad_recognition_path)


photos = os.listdir(input_path)
#photos = photos[1:]
if 'Thumbs.db' in photos:
    photos.remove('Thumbs.db')

number_of_photos =  len(photos)
print('Всего ', number_of_photos, ' фотографий (', number_of_photos/3, 'машин)')

for index, i in enumerate(photos):   
    
    if index % 3 == 0:
        
        current_input_direction = input_path + '\\' + i
               
        im = Image.open(current_input_direction)
        resized_im = im.resize((round(im.size[0]*0.6), round(im.size[1]*0.6)))
        resized_im.save(i)
        
        current_input_direction = i
    
        if (current_input_direction[-4:] == '.JPG') or (current_input_direction[-4:] == '.jpg'):

            text = yandex_recognition(current_input_direction)
        
            if text in all_vins:
                comment = 'OK'

                current_vin_output_path = output_folder_path + '\\' + text
                try:
                    os.mkdir(current_vin_output_path)
                except:
                    None
                
                shutil.copy(input_path + '\\' + photos[index], current_vin_output_path)
                shutil.copy(input_path + '\\' + photos[index+1], current_vin_output_path)
                shutil.copy(input_path + '\\' + photos[index+2], current_vin_output_path)
                
                
            elif text == 'Ошибка распознавания':
                comment = ""
                
                shutil.copy(input_path + '\\' + photos[index], bad_recognition_path)
                shutil.copy(input_path + '\\' + photos[index+1], bad_recognition_path)
                shutil.copy(input_path + '\\' + photos[index+2], bad_recognition_path)
            
            else:
                comment = 'Не найден VIN в базе'
                current_vin_output_path = not_found_path + '\\' + text
                try:
                    os.mkdir(current_vin_output_path)
                except:
                    None
                
                shutil.copy(input_path + '\\' + photos[index], current_vin_output_path)
                shutil.copy(input_path + '\\' + photos[index+1], current_vin_output_path)
                shutil.copy(input_path + '\\' + photos[index+2], current_vin_output_path)
            
            print(round(((index + 3)/number_of_photos)*100, 2), '% обработано -', index/3 + 1, i, text, comment)
            os.remove(i)

### Работа над ошибками

In [None]:
photos = os.listdir(bad_recognition_path)
#photos = photos[60:] 
for index, i in enumerate(photos):
    
    if index % 3 == 0:
        
        current_input_direction = bad_recognition_path + '\\' + i

        """im = Image.open(current_input_direction)
        resized_im = im.resize((round(im.size[0]*0.3), round(im.size[1]*0.3)))
        resized_im.save(i)
        current_input_direction = i"""
    
        if current_input_direction[-4:] == '.JPG' or current_input_direction[-4:] == '.jpg':

            text = yandex_recognition(current_input_direction)
        
            if text in all_vins:
                
            
                comment = 'OK'

                current_vin_output_path = output_folder_path + '\\' + text
                os.mkdir(current_vin_output_path)
                    
                shutil.move(bad_recognition_path + '\\' + photos[index], current_vin_output_path)
                shutil.move(bad_recognition_path + '\\' + photos[index+1], current_vin_output_path)
                shutil.move(bad_recognition_path + '\\' + photos[index+2], current_vin_output_path)
            
            elif text == 'Ошибка распознавания':
                comment = ''
            
            else:
                comment = 'Не найден VIN в базе'
                current_vin_output_path = not_found_path + '\\' + text
                os.mkdir(current_vin_output_path)
                
                shutil.move(bad_recognition_path + '\\' + photos[index], current_vin_output_path)
                shutil.move(bad_recognition_path + '\\' + photos[index+1], current_vin_output_path)
                shutil.move(bad_recognition_path + '\\' + photos[index+2], current_vin_output_path)
            
            print(index/3, i, text, comment)
             
            try:
                os.remove(i)
            except:
                None