Pomocou nasledujúceho skriptu vieme klasifikovať textové zmluvy na základe kľúčových slov zo súboru **keywords.txt**, výstupom bude tabuľka obsahujúca k metadátam aj údaje o jednotlivom výskyte kľúčových slov.

In [None]:
import os
import re
import numpy as np
import pandas as pd
import ast

find_txt    = re.compile('txt')
working_dir = os.getcwd()+'\\contracts_text\\'

extract_ID = re.compile(r'\d+')

contracts = [f for f in os.listdir(working_dir) if os.path.isfile(os.path.join(working_dir, f))]
contracts_txt = [f for f in contracts if (len(find_txt.findall(f))>0)]

# Import clean table to extract metadata from it
DB_clean = pd.read_csv('CRZ_DB_clean.csv', delimiter = '|')

# Keywords are stored in keywords.txt provided in rows as categories separated by comma, first word is name of the category
# Script searches for keywords, keywords are prepared by lowercasing

fo = open('keywords.txt', 'r', encoding = 'utf-8')
lines = fo.readlines()
fo.close()

categories = []

# Import keywords from keywords.txt and prepare data structure
for line in lines:
    line = line.split(',')

    category_name = line[0]
    keywords = []
    hits     = []
    hits_per_category = 0

    for i, item in enumerate(line):
        if (i>0):
            keywords.append(item.strip().casefold())
            hits.append(0)

    categories.append([category_name,keywords,hits,hits_per_category])

# Prepare header for export
header_metadata = ['Nazov','ID','Inner-ID','Objednavatel_ICO','Objednavatel','Objednavatel_adresa','Dodavatel_ICO','Dodavatel','Dodavatel_adresa',
                   'Datum_zverejnenia','Datum_podpisu','Datum_platnosti','Datum_ucinnosti','Posledna_zmena','Cena_konecna','Cena_podpisana','Rezort','Typ',
                   'Priloha_ID','Priloha_nazov','Link','Velkost','Datum','Text']

header_sum_cat    = ['Výskyty']
header_categories = [category[0] for category in categories]
header_keywords   = []

for category in categories:
    header_keywords = header_keywords + category[1]

header = header_metadata + header_sum_cat + header_categories + header_keywords

row_list = []
N = len(contracts_txt)

number_of_characters = []

# Go through all processed text files, lowercase it, for every keyword count number of occurrences
for i, contract in enumerate(contracts_txt):
    print('Analysing contract: ', contract, ' ', i, 'out of', N)

    fo = open(working_dir+contract, 'r', encoding = 'utf-8')
    lines = fo.readlines()
    fo.close()

    text = ''

    for line in lines:
        text += line.casefold().replace('\n',' ')

    del lines

    for category in categories:
        category[3] = 0
        for j, keyword in enumerate(category[1]):
            category[2][j] = text.count(keyword.casefold())
            category[3] += category[2][j]

    # Extract metadata and join it with counted hits
    row = DB_clean.loc[DB_clean['ID'] == int(extract_ID.findall(contract)[0])]

    meta_data = [row.iloc[0,i] for i in range(1,19)]
    attachment_data = ast.literal_eval(row.iloc[0,20])[0]

    meta_data = meta_data + attachment_data
    data_hits = []

    for category in categories:
        data_hits += category[2]

    sum_data = 0
    for category in categories:
        sum_data += category[3]

    data = meta_data + [sum_data] + [category[3] for category in categories] + data_hits
    row_list.append(dict((label,data[i]) for i, label in enumerate(header)))

    # Count number of characters in contract
    number_of_characters.append(len(text))

# Save unranked csv table
DB_clean_tagged = pd.DataFrame(row_list, columns = header)
DB_clean_tagged.to_csv('DB_clean_text_tagged.csv', header = header, sep='|')

# Rank contracts according to number of keywords, number of characters in contract and price.
# Ranking is based on three categories listed above, in each category 10 points are distributed
# according to logarithmic scale and then added. Contracts are sorted by the rank.

# Insert new column -- number of characters
DB_clean_tagged.insert(24, 'Pocet_znakov', number_of_characters)
DB_clean_tagged = DB_clean_tagged.sort_values(by=['Výskyty','Pocet_znakov','Cena_konecna'], ascending = False)

# Sort rows by number of hits, number of characters and final prize
delete_rows = []
for index, row in DB_clean_tagged.iterrows():
    if ((float(row['Pozícia']) == 0) and (float(row['Popis práce']) == 0)):
        delete_rows.append(index)
    if (float(row['Výskyty']) == 0):
        delete_rows.append(index)

print('Sorted : ',N,'| Filtered out : ',len(delete_rows))

DB_clean_tagged = DB_clean_tagged.drop(delete_rows)
DB_clean_tagged.to_csv('CRZ_DB_clean_text_tagged.csv', sep='|')


Nasledujúci skript presunie relevantné prílohy do samostatného podpriečinku.

In [None]:
import os
import re
import numpy as np
import pandas as pd

source_dir    = os.getcwd()+'\\contracts_text\\'
direction_dir = os.getcwd()+'\\contracts_mandayrates\\'

if not os.path.exists('contracts_mandayrates'):
    os.makedirs('contracts_mandayrates')

DB_clean_tagged = pd.read_csv('DB_clean_text_tagged.csv', delimiter = '|')

for index, row in DB_clean_tagged.iterrows():
    contract = str(row['ID']) + '_' + str(row['Priloha_ID'])
    os.system('move ' +source_dir+contract +'.pdf ' + direction_dir+contract +'.pdf')
    os.system('move ' +source_dir+contract +'.txt ' + direction_dir+contract +'.txt')

In [None]:
import os
import subprocess
import re
import numpy as np
import pandas as pd
import camelot
import time
import sys

# pdfminer for extracting information about number of pages
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import resolve1

working_dir = os.getcwd()+'/contracts_mandayrates/'

# Open tagged spreadsheet and read information, sorted
DB_clean_tagged = pd.read_csv('CRZ_DB_clean_text_tagged.csv', delimiter = '|')
DB_clean_tagged = DB_clean_tagged.drop('Unnamed: 0', 1)

number_of_contracts = len(DB_clean_tagged.index)

# If column with number of PDF pages in contract do not exist
# then calculate it ...
if not 'Pocet_stran' in DB_clean_tagged.columns:
    DB_clean_tagged.insert(25, 'Pocet_stran', np.zeros(number_of_contracts))

    # Identify how many pages are in every contract
    # useful to estimate time needed to extract all tables from PDF
    print('Counting pages in PDFs ...')
    i = 0
    for index, row in DB_clean_tagged.iterrows():
        i += 1

        contract = str(row['ID']) + '_' + str(row['Priloha_ID']) + '.pdf'
        print('Processing contract ', contract,' ', i, 'out of', number_of_contracts)

        contract_file = open(working_dir + contract, 'rb')
    
        parser = PDFParser(contract_file)
        document = PDFDocument(parser)

        pages = resolve1(document.catalog['Pages'])['Count']
        DB_clean_tagged.at[index,'Pocet_stran'] = int(pages)

        contract_file.close()

    # Save partial result
    DB_clean_tagged.to_csv('CRZ_DB_clean_text_tagged.csv', sep='|')

sys.stdout.flush()
# Analyse PDF pages and extract tables
if not 'Pocet_tabuliek' in DB_clean_tagged.columns:

    total_pages = 0
    # Calculate total number of pages to analyse
    for index, row in DB_clean_tagged.iterrows():
        total_pages += int(row['Pocet_stran'])

    print('Total number of pages to analyse:', total_pages)

    DB_clean_tagged.insert(25, 'Pocet_tabuliek', np.zeros(number_of_contracts))

    empty_column = [None] * number_of_contracts
    if not 'Tabulky_strany' in DB_clean_tagged.columns:
        DB_clean_tagged.insert(26, 'Tabulky_strany', empty_column)

    # Analyse PDF pages and extract tables
    i = 0
    for index, row in DB_clean_tagged.iterrows():
        i += 1

        contract = str(row['ID']) + '_' + str(row['Priloha_ID']) + '.pdf'
        contract_dir = working_dir + contract.replace('.pdf','_tables')
        sys.stdout.flush()

        if not os.path.exists(contract_dir):
            os.makedirs(contract_dir)

            number_of_pages = int(row['Pocet_stran'])

            print('Processing contract ', contract,' ', i, 'out of', number_of_contracts)
            print('Going to process', number_of_pages, 'pages ...')
            sys.stdout.flush()

            number_of_tables = 0
            tables_pages = []

            start_time = time.time()
            for page in range(1,number_of_pages):

                print('\tProcessing page', page, 'out of', number_of_pages)
                tables = camelot.read_pdf(working_dir + contract, pages = str(page))
                sys.stdout.flush()

                if (len(tables)>0):

                    for j in range(0,len(tables)):
                        tables[j].to_csv(contract_dir + '/table_' + str(number_of_tables + j + 1) + '.csv')
                        tables_pages.append(page)
                        print(tables[j].parsing_report)

                    number_of_tables += len(tables)

            end_time = time.time()
            print('Processed ', number_of_pages, ' pages in ',(end_time-start_time))

            DB_clean_tagged.at[index,'Pocet_tabuliek'] = int(number_of_tables)
            DB_clean_tagged.at[index,'Tabulky_strany'] = str(tables_pages)

            # Save information about number of pages
            DB_clean_tagged.to_csv('CRZ_DB_clean_text_tagged.csv', sep='|')

In [None]:
import os
import re
import pandas as pd
import ast
import shutil

def natural_sort(l):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
    return sorted(l, key = alphanum_key)

find_txt    = re.compile('txt')
working_dir = os.getcwd()+'\\contracts_mandayrates\\'
final_dir   = os.getcwd()+'\\contracts_mandayrates_tables\\'
extract_number = re.compile(r'\d+')

# Check if there is directory _tables if yes, delete it, if no create it
if os.path.isdir(final_dir):
    shutil.rmtree(final_dir)
else:
    os.mkdir(final_dir)

# List all subdirectories with tables in working dir
subdirectories = [ndir for ndir in os.listdir(working_dir) if os.path.isdir(os.path.join(working_dir, ndir))]

# Import file with keywords
fo = open('keywords.txt', 'r', encoding = 'utf-8')
lines = fo.readlines()
fo.close()

categories = []

# Import keywords from keywords.txt and prepare data structures
for line in lines:
    line = line.split(',')

    category_name = line[0]
    keywords = []
    hits     = []
    hits_per_category = 0

    for i, item in enumerate(line):
        if (i>0):
            keywords.append(item.strip())
            hits.append(0)

	categories.append([category_name,keywords,hits,hits_per_category])

header_categories = [category[0] for category in categories]
header_keywords   = []

# Import metadata from text_tagged file
DB_import = pd.read_csv('DB_clean_text_tagged.csv', delimiter = '|')
DB_import = DB_import.drop('Unnamed: 0', 1)

header_import = ['Nazov','ID','Inner-ID','Objednavatel_ICO','Objednavatel','Objednavatel_adresa','Dodavatel_ICO','Dodavatel','Dodavatel_adresa',
				'Datum_zverejnenia','Datum_podpisu','Datum_platnosti','Datum_ucinnosti','Posledna_zmena','Cena_konecna','Cena_podpisana','Rezort','Typ',
				'Priloha_ID','Priloha_nazov','Link','Velkost','Datum','Text','Pocet_znakov','Pocet_stran','Pocet_tabuliek','Tabulky_strany']

header_metadata = ['Nazov','ID','Inner-ID','Objednavatel_ICO','Objednavatel','Objednavatel_adresa','Dodavatel_ICO','Dodavatel','Dodavatel_adresa',
				'Datum_zverejnenia','Datum_podpisu','Datum_platnosti','Datum_ucinnosti','Posledna_zmena','Cena_konecna','Cena_podpisana','Rezort','Typ',
				'Priloha_ID','Priloha_nazov','Link','Velkost','Datum','Text','Pocet_znakov','Pocet_stran','Tabulka_strana','Tabulka_cislo']

len_header_import = len(header_import)
DB_import = DB_import.drop(DB_import.columns.difference(header_import), axis=1)

# Produce new header for new file
header_sum_cat    = ['Výskyty']
header_categories = [category[0] for category in categories]
header_keywords   = []

for category in categories:
	header_keywords = header_keywords + category[1]

header = header_metadata + header_sum_cat + header_categories + header_keywords

# For each CSV table in each subdirectory tag tables
# Produce another CSV in which each row contain (meta)information about some table

N_dir = len(subdirectories)
row_list = []

for index, directory in enumerate(subdirectories):
	print('Processing contract', directory.strip('_tables'), '-', index+1, 'out of', N_dir)

	table_dir = os.path.join(working_dir, directory)
	tables = [f for f in os.listdir(table_dir) if os.path.isfile(os.path.join(table_dir, f))]

	# Sort tables according to number in table_number.csv
	tables = natural_sort(tables)

	for table in tables:
		fo = open(os.path.join(table_dir,table), 'r', encoding = 'utf-8')
		lines = fo.readlines()
		fo.close()

		text = ''

		for line in lines:
			text += line.casefold().replace('\n',' ')

		del lines

		for category in categories:
			category[3] = 0
			for j, keyword in enumerate(category[1]):
				category[2][j] = text.count(keyword.casefold())
				category[3]   += category[2][j]

		# Extract metadata and join it with counted hits
		row = DB_import.loc[DB_import['Priloha_ID'] == int(extract_number.findall(table_dir)[1])]

		meta_data = [row.iat[0,i] for i in range(0,len(header_import)-1)]
		meta_data.append(int(extract_number.findall(table)[0]))

		# Insert number of the page
		if (float(row['Pocet_tabuliek'])>0):
			meta_data[len(meta_data)-2] = ast.literal_eval(meta_data[len(meta_data)-2])[int(extract_number.findall(table)[0])-1]
		else:
			meta_data[len(meta_data)-2] = 0

		data_hits = []

		for category in categories:
			data_hits += category[2]

		sum_data = 0
		for category in categories:
			sum_data += category[3]

		data = meta_data + [sum_data] + [category[3] for category in categories] + data_hits
		row_list.append(dict((label,data[i]) for i, label in enumerate(header)))

# Save unranked CSV table
DB_export = pd.DataFrame(row_list, columns = header)
DB_export.to_csv('CRZ_DB_clean_tables.csv', header = header, sep='|')

# Filter out all irrelevant tables
# and produce CSV which has only tables with at least one position and one
delete_rows = []
for index, row in DB_export.iterrows():
	if not(((float(row['Pozícia']) > 0) or (float(row['Popis práce']) > 0)) and (float(row['Kvantifikátor']) > 0)):
		delete_rows.append(index)

print('Number of tables : ', DB_export.shape[0],'| Filtered out : ', len(delete_rows))

DB_export = DB_export.drop(delete_rows)
DB_export.to_csv('CRZ_DB_clean_tables.csv', sep='|')

# Copy all relevant tables into directory _tables
for index, row in DB_export.iterrows():
	source      = os.path.join(working_dir,str(row['ID']) + '_' + str(row['Priloha_ID']) + '_tables\\table_' + str(row['Tabulka_cislo']) + '.csv')
	destination = os.path.join(final_dir,str(row['ID']) + '_' + str(row['Priloha_ID']) + '_' + str(row['Tabulka_cislo']) + '.csv')
	os.system('copy '+source+' '+destination)


In [None]:
# Matej Badin | UHP | 2019                                             |
# -------------------------------------------------------------------- |
# Packages needed :  numpy, re, os, pandas                             |
# -------------------------------------------------------------------- |
# Tag and filter extracted tables based on keywords                    |
# -------------------------------------------------------------------- |

import os
import re
import pandas as pd
import ast
import operator
import shutil

def parse_text(text):
	slovak_alphabet = 'aáäbcčdďeéfghiíjklĺľmnňoóôpqrŕsštťuúvwxyýzž'

	text = text.casefold()
	words = []

	new_word = ''
	word = True
	for char in text:
		if char in slovak_alphabet:
			new_word = new_word + char
			word = True
		else:
			if word:
				words.append(new_word)
				new_word = ''
			word = False

	return words

# Files
working_dir = os.getcwd()
tables_dir       = working_dir+'\\IT_contracts_mandayrates_tables'
clean_tables_dir = working_dir+'\\IT_contracts_mandayrates_clean_tables'

if os.path.isdir(clean_tables_dir):
	shutil.rmtree(clean_tables_dir)
else:
	os.mkdir(clean_tables_dir)

tables_csv = [f for f in os.listdir(tables_dir) if os.path.isfile(os.path.join(tables_dir, f))]

# Import standard Slovak vocabulary corpus and dictionary
import hunspell

normal_SK  = os.path.join(working_dir, 'Dicts\\sk_SK')
english_US = os.path.join(working_dir, 'Dicts\\en_US')
special_SK = os.path.join(working_dir, 'Dicts\\sk_SK_special')

# Dictionary with standard Slovak language and words from contracts in this sector by build_special_dictionary.py
hunspell_normal  = hunspell.Hunspell(normal_SK, normal_SK)
hunspell_english = hunspell.Hunspell(english_US, english_US)
hunspell_special = hunspell.Hunspell(normal_SK, special_SK)

# Own spellcheck function also making sure word is case-folded and whitespace is stripped
def spell(word):
	word = word.casefold().strip()
	return (hunspell_normal.spell(word) or hunspell_english.spell(word) or hunspell_special.spell(word))

# Import keywords and add them to the special dictionary for spellchecking
fo = open('keywords.txt', 'r', encoding = 'utf-8')
lines = fo.readlines()
fo.close()

all_keywords  = []

categories = []
add_words = []

# Import keywords from keywords.txt and put them inside special dictionary
for line in lines:
	line = line.split(',')

	category_name = line[0]
	keywords = []

	for i, item in enumerate(line):
		if (i>0):
			keywords.append(item.strip().casefold())
			all_keywords.append(item.strip().casefold())

	categories.append([category_name,keywords])

# Add keywords to special if they are wrong
for keyword in all_keywords:
	words = keyword.split()
	for word in words:
		if not spell(word):
			add_words.append(word)

# Copy special dictionary and append lines with keywords, reload hunspell_special
special_dic_with_keywords = os.path.join(working_dir, 'Dicts\\sk_SK_special_with_keywords')
# Copy file
# Change number in first line
# Append lines to file

# Reload special Hunspell dictionary
hunspell_special = hunspell.Hunspell(normal_SK, special_dic_with_keywords)

# Empty dictionary to be filled with suggested keywords
suggested_keywords = dict()

#####################################################################################################
# Analysis !
#####################################################################################################

N_tables = len(tables_csv)
for i, table_csv in enumerate(tables_csv):
	print('Processing table:',table_csv,' ', i+1, 'out of',N_tables)

	# Step 1
	# Read CSV and destroy any new line characters between " characters
	fo = open(os.path.join(tables_dir,table_csv), 'r', encoding = 'utf-8')
	lines = fo.readlines()
	fo.close()

	reminder = 0

	text = ''
	for line in lines:
		for char in line:
			if char == '"':
				reminder += 1
			reminder = reminder % 2

			if ((char == '\n') and (reminder == 1)):
				pass
			else:
				text += char

	fo = open(os.path.join(tables_dir,'temp.csv'), 'w', encoding = 'utf-8')
	fo.writelines(text)
	fo.close()

	# Step 2
	# Import CSV table into pandas and delete empty columns
	table = pd.read_csv(os.path.join(tables_dir,'temp.csv'), delimiter = ',')
	empty = dict((column,True) for column in table.columns)

	for column in table.columns:
		for index, row in table.iterrows():
			if not((str(row[column]).rstrip() == '') or (str(row[column]) == 'nan')):
				empty[column] = False
				break

	delete = [column for column in table.columns if empty[column]]
	table = table.drop(columns = delete, axis = 1)

	# Step 3
	# Try to identify columns with just dummy characters and not any meaningful word
	# and ... also destroy them
	dummy = dict((column,False) for column in table.columns)
	for column in table.columns:

		correct = 0
		wrong   = 0

		for index, row in table.iterrows():
			words = str(row[column]).casefold().split()
			for word in words:
				if spell(word):
					correct += 1
				else:
					wrong += 1

		# Arbitrarily chosen number
		if (wrong/(wrong+correct)>0.75):
			dummy[column] = True

	delete = [column for column in table.columns if dummy[column]]
	table = table.drop(columns = delete, axis = 1)
	# Save clean table
	table.to_csv(os.path.join(tables_dir,'temp.csv'), sep = ',')

	# Step 4
	# Identify if the first row is the header
	header = False

	# Select keywords in categories 'Hlavička tabuľky'
	selected_keywords   = []
	selected_categories = ['Hlavička tabuľky']

	# but still code in general ;)
	for category in categories:
		if category[0] in selected_categories:
			for keyword in category[1]:
				selected_keywords.append(keyword)

	# Pandas already tried to infer header from CSV - such nice from it ...
	number_of_hits = 0
	for keyword in selected_keywords:
		for column in table.columns:
			number_of_hits += column.count(keyword)

	# Arbitrarily chosen boundary
	if (number_of_hits>2):
		header = True

	# Step 5
	# Identify if there is a specific column with 'Pozicia'
	selected_keywords   = []
	selected_categories = ['Pozícia','Popis práce']
	number_of_hits      = dict((column,0) for column in table.columns)

	for category in categories:
		if category[0] in selected_categories:
			for keyword in category[1]:
				selected_keywords.append(keyword)

	for column in table.columns:
		for row in table[column]:
			row = str(row).casefold()
			for keyword in selected_keywords:
				if keyword in row:
					number_of_hits[column] += 1

	# Sorted columns with 'Pozicia'-like keywords if number of hits is at least > 1
	positions_columns = [(column,number_of_hits[column]/table.shape[0]) for column in table.columns if number_of_hits[column] > 0]
	positions_columns = sorted(positions_columns, key=lambda tup: tup[1], reverse=True)

	# Step 6
	# .. also try to identify columns which has significant number of rows containing numbers or prices
	find_number = re.compile(r'\d+')
	price_header = ['']

	selected_keywords   = []
	selected_categories = ['Hlavička cena']

	for category in categories:
		if category[0] in selected_categories:
			for keyword in category[1]:
				selected_keywords.append(keyword)

	prices_columns = []
	if header:
		for column in table.columns:
			for keyword in selected_keywords:
				if keyword in column:
					if not column in prices_columns:
						prices_columns.append(column)

	ratio_of_number_rows = dict((column,0) for column in table.columns)
	for column in table.columns:
		for row in table[column]:
			row = str(row)
			if len(find_number.findall(row))>0:
				ratio_of_number_rows[column] += 1

		ratio_of_number_rows[column] = ratio_of_number_rows[column]/table.shape[0]

	if len(prices_columns)>0:
		prices_columns = [(column,ratio_of_number_rows[column]) for column in prices_columns if ratio_of_number_rows[column] > 0.75]
	else:
		prices_columns = [(column,ratio_of_number_rows[column]) for column in table.columns if ratio_of_number_rows[column] > 0.75]

	positions_columns_names = [column[0] for column in positions_columns]
	for column in prices_columns:
		if column[0] in positions_columns_names:
			prices_columns.remove(column)

	prices_columns = sorted(prices_columns, key=lambda tup: tup[1], reverse=True)

	# Suggest new keywords based on data in the rows identified as this
	#  new keywords are given points according to the relative number of rows which already contain
	#  some selected keyword.
	if ((len(positions_columns)>0) and (len(prices_columns)>0)):
		for column in positions_columns:
			for row in table[column[0]]:
				row = str(row).casefold()
				words = parse_text(row)
				for word in words:
					if not word in selected_keywords:
						if word in suggested_keywords:
							suggested_keywords[word] += column[1]
						else:
							suggested_keywords[word]  = column[1]

	# Save to clean directory only if there is at least a single price column
	if (len(prices_columns)>0):
		table.to_csv(os.path.join(clean_tables_dir,table_csv), sep = ',')

	# Step 7
	# Identify in which column there is DPH or not
	with_DPH    = False
	without_DPH = False

	for column in table.columns:
		if 's DPH'   in column: with_DPH    = True
		if 'bez DPH' in column: without_DPH = True

	# Print metadata
	print('Header:',header)
	print('Positions:',positions_columns)
	print('Prices:',prices_columns)
	print('s DPH:',with_DPH)
	print('bez DPH:',without_DPH)

# Print suggested keywords
suggested_keywords = sorted(suggested_keywords.items(), key=operator.itemgetter(1), reverse=True)

fo = open('suggested_keywords.txt','w')
for keyword in suggested_keywords:
	fo.write(keyword[0]+'\t\t\t'+str(keyword[1])+'\n')
fo.close()
