In [None]:
import os
import datetime
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
from openai import AzureOpenAI
from dotenv import load_dotenv
import json
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from pydantic import BaseModel, Field
from typing import Literal
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
from functools import partial
import time

In [None]:
# Load environment variables
load_dotenv(override=True)

# Azure Storage Blob configuration
BLOB_CONNECTION_STRING = os.getenv("BLOB_CONNECTION_STRING")
BLOB_CONTAINER_NAME = os.getenv("BLOB_CONTAINER_NAME")

# Azure OpenAI configuration
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AOAI_API_VERSION = '2025-03-01-preview'

In [None]:
def get_blob_service_client():
    """Returns a BlobServiceClient instance."""
    return BlobServiceClient.from_connection_string(BLOB_CONNECTION_STRING)

def upload_to_blob(file_path, blob_name):
    """
    Uploads a file to Azure Blob Storage and returns its URL with SAS token.
    
    Args:
        file_path: Path to the local file
        blob_name: Name to use in blob storage
        
    Returns:
        tuple: (blob_url, sas_token)
    """
    blob_service_client = get_blob_service_client()
    blob_client = blob_service_client.get_blob_client(container=BLOB_CONTAINER_NAME, blob=blob_name)
    
    with open(file_path, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)
    
    start_time = datetime.datetime.now(datetime.timezone.utc)
    expiry_time = start_time + datetime.timedelta(days=1)

    # Generate SAS token
    sas_token = generate_blob_sas(
        account_name=blob_client.account_name,
        container_name=blob_client.container_name,
        blob_name=blob_client.blob_name,
        account_key=blob_service_client.credential.account_key,
        permission=BlobSasPermissions(read=True),
        expiry=expiry_time,
        start=start_time
    )
    
    blob_url = blob_client.url
    return blob_url, sas_token

def get_openai_client():
    """Returns an AzureOpenAI client instance for GPT-4o model."""
    client = AzureOpenAI(
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_key=AZURE_OPENAI_API_KEY,
        api_version=AOAI_API_VERSION
    )
    return client

class ClassificationResult(BaseModel):
    """部品外観検査の分類結果を表すPydanticモデル。"""
    classification: Literal["OK", "汚れ", "欠け", "削り節"]
    reasoning: str
    confidence: int = Field(
        ...,
        ge=1,
        le=10,
        description="分類の確信度（1〜10の整数）"
    )

In [None]:
def upload_images_from_folder(folder_path):
    """
    Upload all images from a folder structure to Blob Storage.
    
    Args:
        folder_path: Path to the input folder
        
    Returns:
        dict: Dictionary with image paths as keys and (blob_url, sas_token) as values
    """
    uploaded_images = {}
    folder_path = Path(folder_path)
    
    for image_file in folder_path.rglob("*.png"):
        # Create blob name preserving folder structure
        relative_path = image_file.relative_to(folder_path)
        blob_name = str(relative_path).replace("\\", "/")
        
        try:
            blob_url, sas_token = upload_to_blob(str(image_file), blob_name)
            full_url = f"{blob_url}?{sas_token}"
            uploaded_images[str(image_file)] = full_url
            print(f"Uploaded: {blob_name}")
        except Exception as e:
            print(f"Error uploading {image_file}: {e}")
    
    return uploaded_images

def load_sample_images(uploaded_images, sample_folder="sample"):
    """
    Load sample images for few-shot learning.
    
    Args:
        uploaded_images: Dictionary of uploaded images
        sample_folder: Folder name to look for (default: "sample")
    
    Returns:
        dict: Dictionary with labels as keys and list of image URLs as values
    """
    samples = {"OK": [], "汚れ": [], "欠け": [], "削り節": []}
    
    for image_path, url in uploaded_images.items():
        # Normalize path separators and check if it contains sample folder
        normalized_path = image_path.replace("\\", "/")
        
        if f"/{sample_folder}/" in normalized_path or normalized_path.endswith(f"/{sample_folder}"):
            # Extract label from path - look for any of our target labels
            for label in samples.keys():
                if f"/{label}/" in normalized_path or f"\\{label}\\" in image_path:
                    samples[label].append(url)
                    print(f"Added {label} sample: {Path(image_path).name}")
                    break
    
    return samples

def create_few_shot_messages(sample_images):
    """
    Create few-shot messages for OpenAI API.
    """
    messages = [
        {
            "role": "developer",
            "content": """あなたは製造業の品質検査の専門家です。部品の画像を見て、以下の4つのカテゴリーに分類してください。必ず「分類結果」「理由」「確信度（1–5の5段階）」の３つを出力してください。

				─── 分類定義 ───
				• OK  
				– 定義：製品の外観に異常がなく、形状・表面状態ともに規定の仕様範囲内。  
				– 判定根拠例：  
					• 表面に凹凸や飛び出しがない  
					• 欠け・割れ・ヒビが見当たらない  
					• 汚れや異物付着が確認できない  

				• 削り節  
				– 定義：製造工程で発生した切削くずが製品上部やエッジに残留・飛び出しているもの。  
				– 判定根拠例：  
					• 細長い切削片（くず状／糸くず状）が付着  
					• 本体素材と質感・色がわずかに異なり「くず」と認識できる  
					• 複数箇所に飛び出し・突起がある  

				• 欠け  
				– 定義：表面またはエッジの一部が物理的に欠落し、凹状の傷・穴があるもの。  
				– 判定根拠例：  
					• エッジが不自然に丸くなっている、あるいはギザギザ  
					• 表面材が剥離し底地（金属・樹脂芯）が見える  
					• 欠落部分の輪郭がシャープで「欠け」と特定できる  

				• 汚れ  
				– 定義：表面に油脂・ほこり・粉じん・液体シミなどの異物が付着し、拭いても残るもの。  
				– 判定根拠例：  
					• 基材と異なる色調の斑点・筋状シミ・塊  
					• 汚れ部分だけ光沢が落ちマットに見える  
					• 触れるとべたつきやざらつきを感じる  

				─── 分析手順 ───
				1. 画像全体を俯瞰し、形状・輪郭の歪みや突出をチェック  
				2. 表面テクスチャと色調を観察し、「OK」と「汚れ」の区別点を確認  
				3. 欠けや切削くずの特徴（シャープさ、糸くず状など）を拡大して詳細に観察  
				4. 各定義と照合し、最も当てはまるラベルを選択  

				─── 確信度定義（1–5）───
				1. 根拠ほぼなし：特徴が不明瞭で他ラベルの可能性も高い  
				2. 根拠弱い：一部該当する箇所はあるが曖昧・画像が不鮮明  
				3. 標準的：定義に合う特徴が確認でき、他の候補は比較的低い  
				4. 高い：複数の明確な根拠が揃っており、ほぼ間違いない  
				5. 非常に高い：決定的な特徴が圧倒的に一致、誤分類の余地なし  

				─── 出力フォーマット ───
				分類結果: <OK／汚れ／欠け／削り節>  
				理由: <具体的な観察ポイントと定義との照合結果を詳細に記述>  
				確信度: <1〜5の整数で、上記定義に沿って評価>
				"""
        }
    ]
    
    # Define detailed reasoning templates for each category
    reasoning_templates = {
        "OK": [
            "画像を詳細に観察した結果、製品の表面は滑らかで均一な仕上がりを示しており、欠け・割れ・ヒビは一切確認できません。エッジ部分も設計通りの形状を保持し、異物付着や汚れも見当たりません。全体的に規定の仕様範囲内の良好な状態です。",
            "製品全体を俯瞰したところ、形状・輪郭に歪みや突出はなく、表面テクスチャも一様で異常は認められません。切削くずの残留や表面剥離の兆候もなく、品質基準を満たしている状態と判断します。"
        ],
        "汚れ": [
            "表面に基材とは異なる色調の斑点状の付着物が複数箇所確認できます。これらの汚れ部分は光沢が落ちてマット状に見え、油脂系または粉じんの付着と考えられます。拭き取りでは除去困難な定着した汚れの特徴を示しています。",
            "画像中央付近に筋状のシミと、周辺部に点状の汚れが観察されます。汚れ部分の色調が基材と明らかに異なり、表面の質感も変化していることから、外部からの異物付着による汚れと断定します。"
        ],
        "欠け": [
            "エッジ部分に明確な欠落が確認でき、本来の直線的な輪郭が不自然に丸くなっています。欠落部分の境界はシャープで、物理的な衝撃による材料の剥離と判断されます。底地の素材も一部露出しており、典型的な「欠け」の特徴を示しています。",
            "製品表面に凹状の傷が複数箇所見られ、特に角部において材料の欠落が顕著です。欠け部分の輪郭が明瞭で、表面コーティングが剥がれて下地が見える状況から、物理的損傷による欠けと特定できます。"
        ],
        "削り節": [
            "製品上部とエッジ部分に細長い糸くず状の切削片が複数付着しているのが確認できます。これらの切削くずは本体素材とは質感が異なり、製造工程で発生した切削屑が除去されずに残留したものと判断されます。",
            "表面に複数の突起状の飛び出しが観察され、これらは製造時の切削加工で発生したくず片の付着と考えられます。くず片の形状が細長く、本体部分とは色調や質感が微妙に異なることから削り節の特徴と一致します。"
        ]
    }
    
    # Add few-shot examples
    for label, urls in sample_images.items():
        reasoning_list = reasoning_templates.get(label, [f"この画像は{label}の特徴を示しています。"])
        
        for i, url in enumerate(urls[:4]):  # Use first 2 samples per category
            reasoning = reasoning_list[i % len(reasoning_list)]
            
            # Set appropriate confidence based on category
            confidence = 4 if label in ["OK", "欠け"] else 3
            
            messages.extend([
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "この部品の画像を分類してください。"
                        },
                        {
                            "type": "image_url",
                            "image_url": {"url": url, "detail": "high"}
                        }
                    ]
                },
                {
                    "role": "assistant",
                    "content": json.dumps({
                        "classification": label,
                        "reasoning": reasoning,
                        "confidence": confidence
                    }, ensure_ascii=False)
                }
            ])
    
    return messages

def classify_image(client, image_url, few_shot_messages):
    """
    Classify a single image using Azure OpenAI with Pydantic structured output.
    """
    messages = few_shot_messages.copy()
    messages.append({
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "この部品の画像を分類してください。"
            },
            {
                "type": "image_url",
                "image_url": {"url": image_url, "detail": "high"}
            }
        ]
    })
    
    try:
        completion = client.beta.chat.completions.parse(
            model="o4-mini",  # Replace with your actual model deployment name
            messages=messages,
            response_format=ClassificationResult,
            max_completion_tokens=1000
        )
        
        result = completion.choices[0].message.parsed
        return {
            "classification": result.classification,
            "reasoning": result.reasoning,
            "confidence": result.confidence
        }
    
    except Exception as e:
        print(f"Error classifying image: {e}")
        return {"classification": "ERROR", "reasoning": f"Error: {e}"}

def classify_image_wrapper(args):
    """
    Wrapper function for parallel classification with better error handling.
    
    Args:
        args: Tuple of (image_url, true_label, image_path, client, few_shot_messages, index, total)
    
    Returns:
        dict: Classification result with metadata
    """
    image_url, true_label, image_path, client, few_shot_messages, index, total = args
    
    print(f"[{index+1}/{total}] Classifying: {Path(image_path).name}")
    
    try:
        result = classify_image(client, image_url, few_shot_messages)
        
        return {
            'image_path': image_path,
            'image_url': image_url,
            'true_label': true_label,
            'predicted': result['classification'],
            'reasoning': result['reasoning'],
            'confidence': result.get('confidence', 0),
            'status': 'success',
            'index': index
        }
    except Exception as e:
        print(f"Error classifying {Path(image_path).name}: {e}")
        return {
            'image_path': image_path,
            'image_url': image_url,
            'true_label': true_label,
            'predicted': 'ERROR',
            'reasoning': f'Classification failed: {str(e)}',
            'confidence': 0,
            'status': 'error',
            'index': index
        }

def classify_images_parallel(test_data, client, few_shot_messages, max_workers=5):
    """
    Classify images in parallel using ThreadPoolExecutor.
    
    Args:
        test_data: List of (image_url, true_label, image_path) tuples
        client: Azure OpenAI client
        few_shot_messages: Few-shot messages for classification
        max_workers: Maximum number of parallel workers
    
    Returns:
        list: List of classification results
    """
    results = []
    total = len(test_data)
    
    # Prepare arguments for parallel processing
    args_list = [
        (image_url, true_label, image_path, client, few_shot_messages, i, total)
        for i, (image_url, true_label, image_path) in enumerate(test_data)
    ]
    
    print(f"Starting parallel classification with {max_workers} workers...")
    start_time = time.time()
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all tasks
        future_to_args = {executor.submit(classify_image_wrapper, args): args for args in args_list}
        
        # Collect results as they complete
        for future in as_completed(future_to_args):
            try:
                result = future.result()
                results.append(result)
                
                # Print progress
                completed = len(results)
                elapsed = time.time() - start_time
                avg_time = elapsed / completed
                eta = avg_time * (total - completed)
                
                if completed % 5 == 0 or completed == total:
                    print(f"Progress: {completed}/{total} ({completed/total*100:.1f}%) - "
                          f"ETA: {eta:.1f}s")
                          
            except Exception as e:
                print(f"Future failed: {e}")
    
    # Sort results by index to maintain order
    results.sort(key=lambda x: x['index'])
    
    total_time = time.time() - start_time
    print(f"\nClassification completed in {total_time:.1f}s")
    print(f"Average time per image: {total_time/total:.2f}s")
    
    return results

def classify_images_by_folder_parallel(test_data, client, few_shot_messages, max_workers=3):
    """
    Classify images in parallel, processing each folder (label) separately.
    
    Args:
        test_data: List of (image_url, true_label, image_path) tuples
        client: Azure OpenAI client
        few_shot_messages: Few-shot messages for classification
        max_workers: Maximum number of parallel workers per folder
    
    Returns:
        list: List of classification results
    """
    # Group test data by true label
    grouped_data = {}
    for item in test_data:
        true_label = item[1]
        if true_label not in grouped_data:
            grouped_data[true_label] = []
        grouped_data[true_label].append(item)
    
    all_results = []
    
    print("Processing folders in parallel...")
    
    # Process each folder in parallel
    folder_futures = {}
    with ThreadPoolExecutor(max_workers=len(grouped_data)) as folder_executor:
        for label, folder_data in grouped_data.items():
            print(f"Submitting {len(folder_data)} images from '{label}' folder")
            future = folder_executor.submit(
                classify_images_parallel, 
                folder_data, 
                client, 
                few_shot_messages, 
                max_workers
            )
            folder_futures[future] = label
        
        # Collect results from each folder
        for future in as_completed(folder_futures):
            label = folder_futures[future]
            try:
                folder_results = future.result()
                all_results.extend(folder_results)
                print(f"Completed processing '{label}' folder: {len(folder_results)} images")
            except Exception as e:
                print(f"Error processing '{label}' folder: {e}")
    
    # Sort all results by original index
    all_results.sort(key=lambda x: x['index'])
    
    return all_results

def load_test_images(uploaded_images, test_folder="test"):
    """
    Load test images with their true labels.
    
    Args:
        uploaded_images: Dictionary of uploaded images
        test_folder: Folder name to look for (default: "test")
    
    Returns:
        list: List of tuples (image_url, true_label, image_path)
    """
    test_data = []
    
    for image_path, url in uploaded_images.items():
        # Normalize path separators and check if it contains test folder
        normalized_path = image_path.replace("\\", "/")
        
        if f"/{test_folder}/" in normalized_path:
            # Extract true label from folder structure
            for label in ["OK", "汚れ", "欠け", "削り節"]:
                if f"/{label}/" in normalized_path or f"\\{label}\\" in image_path:
                    test_data.append((url, label, image_path))
                    break
    
    return test_data

def evaluate_results(results_df):
    """
    Evaluate classification results and create confusion matrix.
    """
    # Calculate accuracy
    accuracy = (results_df['predicted'] == results_df['true_label']).mean()
    print(f"Accuracy: {accuracy:.3f}")
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(results_df['true_label'], results_df['predicted']))
    
    # Confusion matrix
    labels = ["OK", "汚れ", "欠け", "削り節"]
    cm = confusion_matrix(results_df['true_label'], results_df['predicted'], labels=labels)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=labels, yticklabels=labels)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    
    return accuracy, cm

In [None]:
# Upload all images from input/背景カットなし folder only
print("Uploading images from 背景カットなし folder to Blob Storage...")
uploaded_images = upload_images_from_folder("input/背景カットなし")
print(f"Uploaded {len(uploaded_images)} images")

In [None]:
# Load sample images for few-shot learning
sample_images = load_sample_images(uploaded_images)
print("Sample images loaded:")
for label, urls in sample_images.items():
    print(f"{label}: {len(urls)} images")

# Create few-shot messages
few_shot_messages = create_few_shot_messages(sample_images)
print(f"Created {len(few_shot_messages)} few-shot messages")

In [None]:
few_shot_messages

In [None]:
# Load test images
test_data = load_test_images(uploaded_images)
print(f"Loaded {len(test_data)} test images")

# Initialize OpenAI client
client = get_openai_client()

# Choose parallel processing method
use_folder_parallel = True  # Set to False for simple parallel processing
max_workers = 4  # Adjust based on your Azure OpenAI rate limits

if use_folder_parallel:
    print("Using folder-based parallel processing...")
    results = classify_images_by_folder_parallel(test_data, client, few_shot_messages, max_workers)
else:
    print("Using simple parallel processing...")
    results = classify_images_parallel(test_data, client, few_shot_messages, max_workers)

# Convert to DataFrame
results_df = pd.DataFrame(results)

# Remove the index column and status column if not needed for evaluation
results_df = results_df.drop(['index', 'status'], axis=1, errors='ignore')

print("\nClassification completed!")
print(results_df[['image_path', 'true_label', 'predicted']].head())

# Show error summary
error_count = len(results_df[results_df['predicted'] == 'ERROR'])
if error_count > 0:
    print(f"\nWarning: {error_count} images failed to classify")
    print("Error details:")
    error_df = results_df[results_df['predicted'] == 'ERROR']
    for _, row in error_df.iterrows():
        print(f"  {Path(row['image_path']).name}: {row['reasoning']}")

In [None]:
# Evaluate results (excluding error cases)
valid_results_df = results_df[results_df['predicted'] != 'ERROR'].copy()

print(f"Evaluation Results (excluding {len(results_df) - len(valid_results_df)} error cases):")
accuracy, confusion_mat = evaluate_results(valid_results_df)

# Save detailed results
output_dir = 'analysis-results'
if not os.path.exists(output_dir):
	os.makedirs(output_dir)
results_df.to_csv(f'{output_dir}/classification_results.csv', index=False, encoding='utf-8-sig')
print("\nDetailed results saved to 'classification_results.csv'")

# Display some example predictions
print("\nSample predictions:")
for idx, row in valid_results_df.head().iterrows():
    print(f"\nImage: {Path(row['image_path']).name}")
    print(f"True: {row['true_label']}, Predicted: {row['predicted']}")
    print(f"Confidence: {row['confidence']}")
    print(f"Reasoning: {row['reasoning'][:100]}...")

# Display confidence distribution
print(f"\nConfidence distribution:")
confidence_stats = valid_results_df['confidence'].describe()
print(confidence_stats)