In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

from PIL import Image
from tqdm import tqdm

In [2]:
from utils.load_models import *
from utils.preprocess import *
from utils.predict import get_pt_predictions, get_tf_predictions
from utils.compute_metrics import compute_accuracy



In [3]:
image_df = pd.read_csv('driver_license_test.csv')
labels_df  = pd.read_csv('dl_2901.csv')[['c_guid', 'c_name', 'c_surname', 'c_patronymic', 'c_birth', 'c_driver_license_date_to', 'c_driver_license']]

In [4]:
image_df = image_df.rename(columns={'ids': 'c_guid'})
testset = pd.merge(image_df, labels_df, on='c_guid', how='inner')

In [5]:
dl_fio = pd.read_csv('dl_fio_testset.csv')
dl_date = pd.read_csv('dl_date_testset.csv')
dl_serial = pd.read_csv('dl_serial_testset.csv')

In [6]:
assert len(dl_fio) == len(dl_date) == len(dl_serial), 'Датасеты не совпадают! Должна быть одинаковая длина для всех данных'
assert dl_fio['c_guid'].tolist() == dl_date['c_guid'].tolist() == dl_serial['c_guid'].tolist(), 'Датасеты содержат разные айди!'

In [7]:
predictions = get_pt_predictions(dl_fio, dl_date, dl_serial)

100%|██████████| 300/300 [00:03<00:00, 81.16it/s]
100%|██████████| 300/300 [00:02<00:00, 126.07it/s]
100%|██████████| 300/300 [00:01<00:00, 255.14it/s]


In [8]:
df_real = testset[['c_guid', 'c_surname', 'c_name', 'c_patronymic', 'c_driver_license_date_to', 'c_birth', 'c_driver_license']]

rename_cols = {'c_surname': 'surname', 'c_name': 'name', 'c_patronymic': 'midlename', 'c_driver_license_date_to': 'dateout', 'c_birth': 'birthday', 'c_driver_license': 'siriestype3'}
df_real = df_real.rename(columns=rename_cols)

df_real = prepare_dataset(df_real, date_cols=['birthday', 'dateout'], name_cols=['surname', 'name', 'midlename'])

In [9]:
pt_metrics = compute_accuracy(df_real, predictions)

In [10]:
pt_metrics

{'c_guid': 1.0,
 'surname': 0.9466666666666667,
 'name': 0.9433333333333334,
 'midlename': 0.9333333333333333,
 'dateout': 0.8633333333333333,
 'birthday': 0.95,
 'siriestype3': 0.93}

In [11]:
tf_predictions = get_tf_predictions(image_df['c_guid'].tolist())

100%|██████████| 300/300 [01:45<00:00,  2.86it/s]


In [12]:
df_real = testset[['c_guid', 'c_surname', 'c_name', 'c_patronymic', 'c_driver_license', 'c_driver_license_date_to', 'c_birth']]

rename_cols = {'c_surname': 'surname', 'c_name': 'name', 'c_patronymic': 'middle_name', 'c_birth': 'birthday', 'c_driver_license_date_to': 'dateout', 'c_driver_license': 'front_serial'}
df_real = df_real.rename(columns=rename_cols)

df_real = prepare_dataset(df_real, date_cols=['birthday', 'dateout'], name_cols=['surname', 'name', 'middle_name'])
df_real.head()

Unnamed: 0,c_guid,surname,name,middle_name,front_serial,dateout,birthday
0,ru-ds.20241224.68ee8f29-ba61-4507-9d6e-2a7cfcc...,ГРИНЯКИНА,ОКСАНА,ВЛАДИМИРОВНА,99 25 890015,,26.10.1976
1,ru-ds.20241224.5639c7b6-c0bc-4588-a0dc-bf85bbf...,БАРАНОВА,РИММА,ПАВЛОВНА,99 27 985671,25.10.2032,30.07.1984
2,ru-ds.20241224.fc2c43e5-2f60-454b-acf3-50f4e2e...,АКСЕНОВ,ВАСИЛИЙ,ВЛАДИМИРОВИЧ,99 16 494167,03.07.2030,06.12.1974
3,ru-ds.20241224.ba651e6c-3185-432e-b2b7-4d9a513...,ГАЛКИНА,ЕКАТЕРИНА,АНДРЕЕВНА,99 33 080257,18.01.2033,18.06.2001
4,ru-ds.20241224.62c6a646-4a3d-4ce0-839c-bea2bc7...,ХЕЛЕМЕРЯ,АЛЕКСАНДР,АЛЕКСАНДРОВИЧ,99 15 135440,14.03.2030,21.07.1967


In [14]:
tf_metrics = compute_accuracy(df_real, tf_predictions)
tf_metrics

{'c_guid': 1.0,
 'surname': 0.9433333333333334,
 'name': 0.93,
 'middle_name': 0.9266666666666666,
 'front_serial': 0.9266666666666666,
 'dateout': 0.8366666666666667,
 'birthday': 0.94}

In [15]:
from utils.postprocess import rename_cols
rename_col = {'midlename': 'middle_name', 'siriestype3': 'front_serial'}
rename_cols(tf_metrics, rename_col)

In [17]:
df = pd.DataFrame({'new': pt_metrics, 'old': tf_metrics})
df

Unnamed: 0,new,old
c_guid,1.0,1.0
surname,0.903333,0.943333
name,0.936667,0.93
midlename,0.913333,0.926667
dateout,0.863333,0.836667
birthday,0.95,0.94
siriestype3,0.92,0.926667
