In [None]:
from pyspark import SparkContext
from pyspark.sql import SparkSession
import pydeequ
from pydeequ import Check, CheckLevel, AnalysisRunner
from pydeequ.analyzers import *
from pydeequ.suggestions import *
from pydeequ.repository import FileSystemMetricsRepository, ResultKey
from pydeequ.verification import VerificationSuite, VerificationResult
import time
import pytest
import pytest_check as pc
from pytest_check import check_func

# Interim URIs:
parquet_files = [['s3://tahoeqa-interim-data/nvd/nvd', "publishedDate", "lastModifiedDate"], 
                    ['s3://tahoeqa-interim-data/nvd/vendor', "vendors", "products"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.cpe_match.val.cpe_name', "id", 'a'],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.cpe_match', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_cve.description.description_data', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_cve.problemtype.problemtype_data.val.description', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_cve.problemtype.problemtype_data', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_cve.references.reference_data.val.tags', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_cve.references.reference_data', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.children', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.children.val.cpe_match', "id", "index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.children.val.cpe_match.val.cpe_name', "id","index"],
                    ['s3://tahoeqa-interim-data/nvd/nvd_configurations.nodes.val.children.val.children', "id", "index"]
                ]

# Raw Data:
raw_files = [['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2002.json', "CVE_data_numberOfCVEs", "CVE_data_format"], 
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2003.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2004.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2005.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2006.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2007.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2008.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2009.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2010.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2011.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2012.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2013.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2014.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2015.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2016.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2017.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2018.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2019.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2020.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2021.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-2022.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                    ['s3://tahoeqa-raw-data/nvd/nvdcve-1.1-recent.json', "CVE_data_numberOfCVEs", "CVE_data_format"],
                ]



