In [1]:
import pandas as pd
import re
from sqlalchemy import create_engine
import json
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.serialization import pkcs7
import binascii
from typing import Tuple, Optional

def get_engine(db_name: str) -> create_engine:
    """データベース接続エンジンを取得"""
    host = 'localhost'
    with open('/home/asomura/waseda/nextstep/RAPIDS/config/database.json') as f:
        config = json.load(f)['database']
    return create_engine(
        f'postgresql://{config["user"]}:{config["password"]}@{host}/{db_name}'
    )

def clean_cert_data(cert_data: str) -> Optional[bytes]:
    """証明書データのクリーニングとバイナリ変換"""
    try:
        cert_data = cert_data.strip()
        if "-----BEGIN" in cert_data:
            return cert_data.encode('utf-8')
        hex_str = re.sub(r'[^0-9a-fA-F]', '', cert_data.replace('\\x', '').replace(' ', ''))
        if len(hex_str) % 2 != 0:
            hex_str += '0'
        return binascii.unhexlify(hex_str)
    except Exception as e:
        print(f"[ERROR] 証明書データ変換失敗: {str(e)}")
        return None

def load_certificate_from_data(data: bytes) -> Tuple[Optional[x509.Certificate], str]:
    """PEM, DER, PKCS#7（PEM/DER）の各形式に対応して証明書をロード"""
    try:
        if b'-----BEGIN CERTIFICATE-----' in data:
            return x509.load_pem_x509_certificate(data, default_backend()), "PEM"
        if b'-----BEGIN PKCS7-----' in data:
            return pkcs7.load_pem_pkcs7_certificates(data)[0], "PKCS7_PEM"
        try:
            return x509.load_der_x509_certificate(data, default_backend()), "DER"
        except Exception:
            return pkcs7.load_der_pkcs7_certificates(data)[0], "PKCS7_DER"
    except Exception as e:
        print(f"[ERROR] 証明書のロード失敗: {str(e)}")
        return None, "Unknown"

def analyze_certificates_from_db(db_name: str):
    """証明書データを取得し、パース可能な証明書の数を確認"""
    engine = get_engine(db_name)
    query = """
    SELECT id, https_certificate_body FROM website_data
    WHERE status = 7 AND https_certificate_body IS NOT NULL
    """
    df_cert = pd.read_sql_query(query, engine)
    total_records = len(df_cert)
    processed_cert_count = 0
    
    for _, row in df_cert.iterrows():
        data = clean_cert_data(row['https_certificate_body'])
        if not data:
            print(f"[DEBUG] 証明書データ変換失敗: レコードID {row['id']}")
            continue
        cert, cert_format = load_certificate_from_data(data)
        if cert:
            processed_cert_count += 1
        else:
            print(f"[DEBUG] 証明書パース失敗: レコードID {row['id']}, フォーマット: {cert_format}")
    
    print(f"[INFO] {db_name} - 取得レコード数: {total_records}, 処理成功数: {processed_cert_count}")

# 実行
for db in ['website_data', 'normal_sites']:
    analyze_certificates_from_db(db)


[INFO] website_data - 取得レコード数: 12074, 処理成功数: 12074
[INFO] normal_sites - 取得レコード数: 9591, 処理成功数: 9591
