In [19]:
%run base.ipynb

In [20]:
from dataland_qa_lab.utils import config

conf = config.get_config()
dataland_client = conf.dataland_client

# End-2-End-Test
Als QA-Lab-Team möchten wir einen einfachen E2E-Test aufsetzen, der die Gesamtfunktionalität unseres Prototypen testet

# 1. Erstellen der Testdaten von Dataland
Erstellen von  10 Nuclear & Gas Datasets, die wir in unserem Testdurchlauf verwenden möchten

In [21]:
from pathlib import Path

from dataland_qa_lab.dataland.provide_test_data import provide_test_data

project_root = Path().resolve().parent  # noqa: FURB177
pdf_path = project_root / "data" / "pdfs"
json_path = project_root / "data" / "jsons"
test_data = provide_test_data(pdf_path=pdf_path, json_path=json_path)

Data-IDs der 10 Testdatensätze

In [None]:
print(test_data)

Company-Ids der 10 Testdatensätze

In [None]:
from dataland_qa_lab.dataland import provide_test_data

companies = ["concordia", "covestro", "deka", "enbw", "enel", "eon", "iberdrola", "munichre", "rwe", "total"]

company_ids = []

for company in companies:
    company_id = provide_test_data.get_company_id(company)
    company_ids.append(company_id)
    print(company_id)

Basierend auf der CompanyID laden wir nun das zugehörige Nuclear & Gas Dataset und frsgen den ersten Datenpunkt aus Meldebogen 1 ab

In [None]:
import dataland_qa_lab.dataland.get_data as qa

year = "2023"
all_values_426 = []
all_datasource_reference_bytes = []
all_datasets = []

for i in company_ids:
    # Laden aller Datasets aus jeder Periode
    all_data = qa.get_all_company_datasets(company_id=i)

    # Data_ID der gewünschten Periode des Datensets erhalten
    data_id = qa.get_data_id_by_year(company_id=i, year=year)
    print("Data_ID = " + data_id)

    # Laden eines bestimmten Datensets einer bestimmten Periode
    all_datasets.append(qa.get_dataset_by_year(company_id=i, year=year))

    # Rückgabe des ersten Wertes in dem bestimmten Datenset
    all_values_426.append(qa.get_value1_by_year(company_id=i, year=year))
    print("Value 1 = " + qa.get_value1_by_year(company_id=i, year=year))

    # Erhalten der Datenreferenz in Bytes, wenn eine Datenquelle hinterlegt ist
    all_datasource_reference_bytes.append(qa.get_datasource_reference_bytes(company_id=i, year=year))

In [None]:
for o in all_datasets:
    print(o)
    print("\n")

# 2. Laden der Datenquelle von Dataland und Überführen in Textform

Finden des relevanten Bereichs im PDF-Dokument

In [26]:
import io

import pypdf  # type: ignore

all_page_number = []
all_file_id = []
all_file_name = []
all_tag_name = []
all_page = []

for t in all_datasets:
    dataset_section426 = t.data.general.general.nuclear_energy_related_activities_section426
    all_page_number.append(dataset_section426.data_source.page)

    all_file_id.append(dataset_section426.data_source.file_reference)
    all_file_name.append(dataset_section426.data_source.file_name)
    all_tag_name.append(dataset_section426.data_source.tag_name)
    all_page.append(dataset_section426.data_source.page)

    pdf = dataland_client.documents_api.get_document(dataset_section426.data_source.file_reference)
    pdf_stream = io.BytesIO(pdf)
    pdf_reader = pypdf.PdfReader(pdf_stream)

In [None]:
for u in all_page_number:
    print(u)

In [None]:
import dataland_qa_lab.dataland.data_extraction as qa_lab

all_values_426_pdf = []

i = 0
for z in all_datasets:  # noqa: B007
    relevant_page_number = int(all_page_number[i])

    relevant_page = qa_lab.get_relevant_page_of_pdf(relevant_page_number, pdf_reader)

    text_of_page = qa_lab.extract_text_of_pdf(relevant_page)

    section_426 = qa_lab.extract_section_426(text_of_page)
    all_values_426_pdf.append(section_426)

    print(section_426)

    i += 1

In [None]:

all_values_426_pdf = []

for _y in range(len(all_values_426)):
    all_values_426_pdf.append("No")

for w in range(len(all_values_426_pdf)):
    print(all_values_426_pdf[w])

# 4. Vergleich der beiden Werte

In [None]:
for r in range(len(all_values_426)):
    print(str(r + 1) + "." + " Wert in Dataland:" + "\t" + all_values_426[r])
    print(str(r + 1) + "." " Wert im Bericht:" + "\t" + all_values_426_pdf[r])

Verdict mithilfe von if-statements festlegen

In [None]:
from dataland_qa.models.qa_report_data_point_verdict import QaReportDataPointVerdict

all_verdicts = []

for e in range(len(all_values_426)):
    if all_values_426[e] == all_values_426_pdf[e]:
        all_verdicts.append(QaReportDataPointVerdict.QAACCEPTED)
    else:
        all_verdicts.append(QaReportDataPointVerdict.QAREJECTED)

for f in range(len(all_verdicts)):
    print(all_verdicts[f])

# 5. Erstellen und Abschicken eines QA Report

In [None]:
from dataland_qa_lab.dataland import send_report_to_dataland as report

for i in range(len(all_values_426)):
    company_report_data = report.CompanyReportData(
        file_reference=all_file_id[i], file_name=all_file_name[i], publication_date=None
    )
    document_reference_data = report.DocumentReferenceData(
        file_reference=all_file_id[i], page="8", tag_name="Test", file_name=all_file_name[i]
    )

    report_data_instance = report.ReportData(
        commentqareportdatapointmapstringcompanyreport="Kommentar zur QA-Zuordnung",
        commentqareportdatapointextendeddatapointyesno="Ja",
        commentextendeddatapointyesno="Nein",
        verdictqareportdatapointmapstringcompanyreport=all_verdicts[i],
        verdictqareportdatapointextendeddatapointyesno=all_verdicts[i],
        corrected_data={"1.": company_report_data},
        value=section_426,
        quality="Reported",
        data_source=document_reference_data,
    )

    report.send_report_to_dataland_method(
        data_id=data_id, report_data=report_data_instance, dataland_client=dataland_client
    )
    print(report_data_instance)