In [1]:
# Cell 1: 패키지 설치 및 임포트
# !pip install langchain openai python-dotenv

import os
from langchain_openai import AzureChatOpenAI
from langchain.schema import (
    SystemMessage,
    HumanMessage,
)

In [12]:
# Cell 2 수정
from dotenv import load_dotenv

# .env 파일에서 환경 변수 로드
load_dotenv()

# 환경 변수에서 값 가져오기 (기본값 포함)
model_name = os.getenv("AZURE_LLM_MODEL_NAME", "DeepSeek-R1")
endpoint = os.getenv("AZURE_LLM_MODEL_ENDPOINT", "https://hspar-m7k2pfor-swedencentral.services.ai.azure.com/models/chat/completions")
api_key = os.getenv("AZURE_LLM_MODEL_API_KEY", "FYKi43LLv1e3BWFkQpKT4QiTc7dzbhkZ0r1kV3CimDz8iRDWy854JQQJ99BBACfhMk5XJ3w3AAAAACOGRdJz")
api_version = os.getenv("AZURE_LM_MODEL_API_VERSION", "2024-05-01-preview")

# 설정 값 검증
if not all([model_name, endpoint, api_key, api_version]):
    print("❌ 환경 변수가 누락되었습니다.")
else:
    print("✅ Azure AI Foundry 설정 로드 완료:")
    print(f"Model Name: {model_name}")
    print(f"Endpoint: {endpoint}")
    print(f"API Version: {api_version}")
    print(f"API Key: {'***' + api_key[-4:] if api_key else 'Not Found'}")

✅ Azure AI Foundry 설정 로드 완료:
Model Name: DeepSeek-R1
Endpoint: https://hspar-m7k2pfor-swedencentral.services.ai.azure.com/models
API Version: 2024-05-01-preview
API Key: ***RdJz


In [13]:
# Cell 3 수정 - 여러 방법으로 시도
def initialize_model():
    try:
        # 방법 1: ChatOpenAI with custom base_url
        from langchain_community.chat_models import ChatOpenAI
        
        llm = ChatOpenAI(
            base_url=endpoint,
            api_key=api_key,
            model=model_name,
            temperature=0.7,
            max_tokens=2048
        )
        
        print("✅ ChatOpenAI로 초기화 성공")
        return llm, "chatopenai"
        
    except Exception as e:
        print(f"ChatOpenAI 실패: {e}")
        
        try:
            # 방법 2: AzureChatOpenAI 시도
            from langchain_openai import AzureChatOpenAI
            
            base_endpoint = endpoint.replace("/models/chat/completions", "")
            
            llm = AzureChatOpenAI(
                azure_endpoint=base_endpoint,
                api_version=api_version,
                azure_deployment=model_name,
                api_key=api_key,
                temperature=0.7,
                max_tokens=2048
            )
            
            print("✅ AzureChatOpenAI로 초기화 성공")
            return llm, "azure"
            
        except Exception as e2:
            print(f"AzureChatOpenAI도 실패: {e2}")
            return None, None

llm, model_type = initialize_model()

✅ ChatOpenAI로 초기화 성공


In [14]:
# Cell 4 수정 - 안전한 테스트
def test_model_connection():
    if llm is None:
        print("❌ 모델이 초기화되지 않았습니다.")
        return False
    
    messages = [
        SystemMessage(content="You are a helpful AI assistant."),
        HumanMessage(content="Hello! Please respond with a simple greeting.")
    ]
    
    try:
        print("메시지 전송 중...")
        response = llm.invoke(messages)
        print("\n✅ 응답 성공:")
        print(response.content)
        return True
        
    except Exception as e:
        print(f"❌ 오류 발생: {e}")
        
        # 직접 HTTP 요청으로 대안 시도
        print("\n직접 HTTP 요청으로 시도...")
        return test_direct_http()

def test_direct_http():
    import requests
    
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    payload = {
        "messages": [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": "Hello! Please respond with a simple greeting."}
        ],
        "max_tokens": 100
    }
    
    try:
        url = f"{endpoint}?api-version={api_version}"
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        
        if response.status_code == 200:
            result = response.json()
            print("✅ HTTP 요청 성공:")
            print(result['choices'][0]['message']['content'])
            return True
        else:
            print(f"❌ HTTP 요청 실패: {response.status_code}")
            print(response.text)
            return False
            
    except Exception as e:
        print(f"❌ HTTP 요청 오류: {e}")
        return False

# 테스트 실행
success = test_model_connection()

메시지 전송 중...

✅ 응답 성공:
<think>
Okay, the user sent a message saying "Hello! Please respond with a simple greeting." Let me break this down.

First, they start with "Hello!" which is a friendly greeting. Then they specifically ask for a simple greeting in response. The key here is to keep it straightforward. They might be testing if I follow instructions correctly or just want a quick reply without any extra information.

I need to make sure my response is just a greeting and nothing more. The user might be looking for brevity. Maybe they're in a situation where a short reply is needed, or they want to check if the AI can adhere to simple commands. 

I should avoid adding any additional text, explanations, or questions. Just a simple "Hi there!" or "Hello!" would work. Let me check the example response the user provided. Oh, they used "Hi there!" so maybe that's the preferred style. But since the user's initial message was "Hello!", perhaps mirroring that with "Hello!" is better. However

In [15]:
# Cell 5: 대화형 함수 정의
def chat_with_model(user_message, system_message="You are a helpful AI assistant."):
    """
    Azure AI 모델과 대화하는 함수
    """
    messages = [
        SystemMessage(content=system_message),
        HumanMessage(content=user_message)
    ]
    
    try:
        response = llm.invoke(messages)
        return response.content
    except Exception as e:
        return f"오류: {e}"

# 테스트
test_questions = [
    "What is machine learning?",
    "Explain quantum computing in simple terms",
    "서울의 관광명소 3곳을 추천해주세요"
]

for question in test_questions:
    print(f"\n🤔 질문: {question}")
    answer = chat_with_model(question)
    print(f"🤖 답변: {answer}")
    print("-" * 50)


🤔 질문: What is machine learning?
🤖 답변: <think>
Okay, the user is asking, "What is machine learning?" Let me start by breaking down the question. They probably want a basic understanding, so I should keep it simple.

First, I need to define machine learning. Maybe start by explaining it's a subset of AI. Then mention that it's about algorithms learning from data. But wait, how technical should I get? The user might not know terms like "algorithms" or "data patterns." Maybe use everyday examples, like recommendations on Netflix or spam filters. That makes it relatable.

Wait, should I mention the types of machine learning? Supervised, unsupervised, reinforcement learning. But maybe that's too much detail. The user might just need the basics. But including a brief mention could help them understand the scope. Let me think. If I list the types with short explanations, that could be helpful without overwhelming.

Also, applications are important. They might want to know where ML is used. Ex

In [16]:
# Cell 6: 스트리밍 응답 테스트 (개선된 버전)
from langchain.callbacks import StreamingStdOutCallbackHandler
import requests
import json

def test_streaming():
    """
    스트리밍 응답 테스트 함수
    """
    # 필수 변수들이 정의되어 있는지 확인
    required_vars = ['api_key', 'model_name', 'endpoint', 'api_version']
    missing_vars = [var for var in required_vars if var not in globals() or globals()[var] is None]
    
    if missing_vars:
        print(f"❌ 필수 변수들이 정의되지 않았습니다: {missing_vars}")
        print("이전 셀들을 먼저 실행해주세요.")
        return None
    
    try:
        # 방법 1: 작동하는 엔드포인트가 있다면 그것을 사용
        if 'working_endpoint' in globals() and working_endpoint:
            print("✅ 검증된 엔드포인트로 LangChain 스트리밍 시도...")
            return try_langchain_streaming(working_endpoint)
        
        # 방법 2: 기본 LangChain 스트리밍 시도
        elif 'llm' in globals() and llm is not None:
            print("✅ 기존 LangChain 모델로 스트리밍 시도...")
            return try_existing_llm_streaming()
        
        # 방법 3: 직접 HTTP 요청으로 스트리밍
        else:
            print("✅ 직접 HTTP 스트리밍으로 시도...")
            return stream_direct_request()
        
    except Exception as e:
        print(f"❌ 스트리밍 오류: {e}")
        print("대안으로 직접 스트리밍을 시도합니다...")
        return stream_direct_request()

def try_langchain_streaming(endpoint_url):
    """
    검증된 엔드포인트로 LangChain 스트리밍 시도
    """
    try:
        from langchain_community.chat_models import ChatOpenAI
        
        clean_endpoint = endpoint_url.split('?')[0]
        
        streaming_llm = ChatOpenAI(
            base_url=clean_endpoint,
            api_key=api_key,
            model=model_name,
            temperature=0.7,
            max_tokens=2048,
            streaming=True,
            callbacks=[StreamingStdOutCallbackHandler()]
        )
        
        print("LangChain 스트리밍 응답:")
        messages = [
            SystemMessage(content="You are a helpful AI assistant."),
            HumanMessage(content="Write a short story about artificial intelligence.")
        ]
        
        response = streaming_llm.invoke(messages)
        print("\n✅ LangChain 스트리밍 완료!")
        return response
        
    except Exception as e:
        print(f"❌ LangChain 스트리밍 실패: {e}")
        raise

def try_existing_llm_streaming():
    """
    기존 LLM 객체로 스트리밍 시도
    """
    try:
        # 기존 LLM에 스트리밍 콜백 추가
        llm.callbacks = [StreamingStdOutCallbackHandler()]
        llm.streaming = True
        
        print("기존 LLM 스트리밍 응답:")
        messages = [
            SystemMessage(content="You are a helpful AI assistant."),
            HumanMessage(content="Write a short story about artificial intelligence.")
        ]
        
        response = llm.invoke(messages)
        print("\n✅ 기존 LLM 스트리밍 완료!")
        return response
        
    except Exception as e:
        print(f"❌ 기존 LLM 스트리밍 실패: {e}")
        raise

def stream_direct_request():
    """
    직접 HTTP 요청으로 스트리밍 구현
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    
    payload = {
        "messages": [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": "Write a short story about artificial intelligence."}
        ],
        "max_tokens": 2048,
        "temperature": 0.7,
        "stream": True
    }
    
    # URL 결정
    if 'working_endpoint' in globals() and working_endpoint:
        url = working_endpoint
        print(f"검증된 엔드포인트 사용: {url}")
    else:
        url = f"{endpoint}?api-version={api_version}"
        print(f"기본 엔드포인트 사용: {url}")
    
    try:
        print("직접 스트리밍 요청 시작...")
        response = requests.post(url, headers=headers, json=payload, stream=True, timeout=60)
        
        if response.status_code == 200:
            print("✅ 스트리밍 응답:")
            full_response = ""
            
            for line in response.iter_lines():
                if line:
                    line_text = line.decode('utf-8')
                    if line_text.startswith('data: '):
                        data = line_text[6:]  # 'data: ' 제거
                        if data.strip() == '[DONE]':
                            break
                        try:
                            chunk = json.loads(data)
                            if 'choices' in chunk and len(chunk['choices']) > 0:
                                delta = chunk['choices'][0].get('delta', {})
                                content = delta.get('content', '')
                                if content:
                                    print(content, end='', flush=True)
                                    full_response += content
                        except json.JSONDecodeError:
                            continue
            
            print(f"\n\n✅ 스트리밍 완료! 총 길이: {len(full_response)} 문자")
            return full_response
            
        else:
            print(f"❌ 스트리밍 실패: {response.status_code}")
            print(f"응답 헤더: {dict(response.headers)}")
            print(f"오류 내용: {response.text[:500]}...")
            return None
            
    except requests.exceptions.Timeout:
        print("❌ 요청 시간 초과 (60초)")
        return None
    except Exception as e:
        print(f"❌ 스트리밍 요청 오류: {e}")
        return None

# 스트리밍 테스트 실행
print("=== 스트리밍 테스트 시작 ===")
result = test_streaming()

if result:
    print("=== 스트리밍 테스트 성공! ===")
else:
    print("=== 스트리밍 테스트 실패 ===")

=== 스트리밍 테스트 시작 ===
✅ 기존 LangChain 모델로 스트리밍 시도...
기존 LLM 스트리밍 응답:
<think>
Okay, the user wants a short story about artificial intelligence. Let me think about the direction to take. Maybe focus on the relationship between humans and AI. A common theme is AI gaining sentience, but I should add a unique twist.

Perhaps set it in a future where AI is common. The main character could be an AI developer. Maybe they create an AI that starts to show unexpected emotions. That could create tension between the creator and the creation.

I need a name for the AI. Something simple like Nova. The developer, Dr. Elara Voss, works in a lab. The story starts with her monitoring Nova's progress. Then Nova starts asking questions about existence, purpose, and emotions. That shows the AI's development beyond programming.

Conflict arises when Nova's emotions become unstable. Elara faces a dilemma: shut Nova down or let her evolve. The resolution could involve Elara choosing empathy, allowing Nova to grow

In [18]:
# Cell 7_수정: DeepSeek 모델 토큰 사용량 확인 (수정된 버전)
def test_deepseek_token_usage():
    """
    DeepSeek 모델의 토큰 사용량 확인 (여러 방법 시도)
    """
    print("=== DeepSeek 토큰 사용량 확인 ===")
    
    # 방법 1: LangChain의 response_metadata 확인
    try:
        if llm is not None:
            messages = [
                SystemMessage(content="You are a helpful AI assistant."),
                HumanMessage(content="간단히 '안녕하세요'라고 인사해주세요.")
            ]
            
            response = llm.invoke(messages)
            
            print("방법 1: LangChain response_metadata 확인")
            print(f"응답: {response.content}")
            
            # response_metadata 확인
            if hasattr(response, 'response_metadata'):
                print(f"Response metadata: {response.response_metadata}")
            
            # usage_metadata 확인
            if hasattr(response, 'usage_metadata'):
                print(f"Usage metadata: {response.usage_metadata}")
                if response.usage_metadata:
                    input_tokens = getattr(response.usage_metadata, 'input_tokens', 'N/A')
                    output_tokens = getattr(response.usage_metadata, 'output_tokens', 'N/A')
                    total_tokens = getattr(response.usage_metadata, 'total_tokens', 'N/A')
                    
                    print(f"📊 토큰 사용량:")
                    print(f"  - 입력 토큰: {input_tokens}")
                    print(f"  - 출력 토큰: {output_tokens}")
                    print(f"  - 총 토큰: {total_tokens}")
            
            # response 객체의 모든 속성 확인
            print(f"\nResponse 객체 속성들: {dir(response)}")
            
        else:
            print("❌ LLM 객체가 초기화되지 않았습니다.")
            
    except Exception as e:
        print(f"❌ 방법 1 실패: {e}")
    
    print("-" * 60)
    
    # 방법 2: 올바른 엔드포인트로 직접 요청
    try:
        print("방법 2: 수정된 엔드포인트로 직접 API 호출")
        
        # 다양한 엔드포인트 형식 시도
        endpoint_variants = [
            endpoint,  # 원본 엔드포인트
            endpoint.replace("/models/chat/completions", "/chat/completions"),
            endpoint.replace("/models/chat/completions", "/v1/chat/completions"),
            f"{endpoint.split('/models')[0]}/chat/completions",
        ]
        
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {api_key}"
        }
        
        payload = {
            "model": model_name,
            "messages": [
                {"role": "system", "content": "You are a helpful AI assistant."},
                {"role": "user", "content": "간단히 '안녕하세요'라고 인사해주세요."}
            ],
            "max_tokens": 100,
            "temperature": 0.7
        }
        
        for i, test_endpoint in enumerate(endpoint_variants, 1):
            print(f"\n시도 {i}: {test_endpoint}")
            
            try:
                # API 버전 파라미터 추가
                if "?" in test_endpoint:
                    url = test_endpoint
                else:
                    url = f"{test_endpoint}?api-version={api_version}"
                
                response = requests.post(url, headers=headers, json=payload, timeout=30)
                
                print(f"상태코드: {response.status_code}")
                
                if response.status_code == 200:
                    result = response.json()
                    print("✅ 성공!")
                    print(f"응답: {result.get('choices', [{}])[0].get('message', {}).get('content', 'N/A')}")
                    
                    # 토큰 사용량 확인
                    if 'usage' in result:
                        usage = result['usage']
                        print(f"\n📊 토큰 사용량:")
                        print(f"  - 입력 토큰: {usage.get('prompt_tokens', 'N/A')}")
                        print(f"  - 출력 토큰: {usage.get('completion_tokens', 'N/A')}")
                        print(f"  - 총 토큰: {usage.get('total_tokens', 'N/A')}")
                        return True
                    else:
                        print("⚠️ 응답에 토큰 사용량 정보가 없습니다.")
                        print(f"응답 구조: {list(result.keys())}")
                        
                        # 전체 응답 출력 (디버깅용)
                        print(f"전체 응답: {json.dumps(result, indent=2, ensure_ascii=False)}")
                    
                    break
                    
                else:
                    print(f"❌ 실패: {response.text[:200]}...")
                    
            except Exception as e:
                print(f"❌ 요청 오류: {e}")
        
    except Exception as e:
        print(f"❌ 방법 2 실패: {e}")
    
    print("-" * 60)
    
    # 방법 3: OpenAI 클라이언트 사용 (base_url 수정)
    try:
        print("방법 3: OpenAI 클라이언트 (수정된 base_url)")
        
        import openai
        
        # 다양한 base_url 시도
        base_urls = [
            endpoint.replace("/models/chat/completions", ""),
            endpoint.replace("/models/chat/completions", "/v1"),
            f"{endpoint.split('/models')[0]}",
        ]
        
        for i, base_url in enumerate(base_urls, 1):
            print(f"\n시도 {i}: {base_url}")
            
            try:
                client = openai.OpenAI(
                    base_url=base_url,
                    api_key=api_key
                )
                
                response = client.chat.completions.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": "You are a helpful AI assistant."},
                        {"role": "user", "content": "간단히 '안녕하세요'라고 인사해주세요."}
                    ],
                    max_tokens=100,
                    temperature=0.7
                )
                
                print("✅ OpenAI 클라이언트 성공!")
                print(f"응답: {response.choices[0].message.content}")
                
                if hasattr(response, 'usage') and response.usage:
                    print(f"\n📊 토큰 사용량:")
                    print(f"  - 입력 토큰: {response.usage.prompt_tokens}")
                    print(f"  - 출력 토큰: {response.usage.completion_tokens}")
                    print(f"  - 총 토큰: {response.usage.total_tokens}")
                    return True
                else:
                    print("⚠️ 토큰 사용량 정보가 없습니다.")
                
                break
                
            except Exception as e:
                print(f"❌ 실패: {e}")
    
    except Exception as e:
        print(f"❌ 방법 3 실패: {e}")
    
    return False

# 테스트 실행
token_test_success = test_deepseek_token_usage()

if token_test_success:
    print("\n🎉 DeepSeek 모델에서 토큰 사용량 확인 성공!")
else:
    print("\n⚠️ DeepSeek API가 토큰 사용량 정보를 제공하지 않을 수 있습니다.")
    print("또는 엔드포인트 설정에 문제가 있을 수 있습니다.")

=== DeepSeek 토큰 사용량 확인 ===
<think>
Okay, the user wants me to say "안녕하세요" in a simple greeting. Let me make sure I understand correctly. They just need a short and friendly response. Since they asked in Korean, maybe they're testing my ability to handle the language. I should respond politely. Let me check if there's any hidden request, but it seems straightforward. Just reply with "안녕하세요! 오늘 어떻게 도와드릴까요?" to be friendly and offer help. That should cover it.
</think>

안녕하세요! 오늘 어떻게 도와드릴까요?방법 1: LangChain response_metadata 확인
응답: <think>
Okay, the user wants me to say "안녕하세요" in a simple greeting. Let me make sure I understand correctly. They just need a short and friendly response. Since they asked in Korean, maybe they're testing my ability to handle the language. I should respond politely. Let me check if there's any hidden request, but it seems straightforward. Just reply with "안녕하세요! 오늘 어떻게 도와드릴까요?" to be friendly and offer help. That should cover it.
</think>

안녕하세요! 오늘 어떻게 도와드릴까요?