In [None]:
# test_local_ai.ipynb

# Блок 1: Импорты и настройка vLLM
import os
from vllm import LLM, SamplingParams
from openai import OpenAI
import json

# Настройка vLLM для Qwen3
model = LLM(
    model="Qwen/Qwen-3-7B",  # или путь к вашей локальной модели
    trust_remote_code=True,
    tensor_parallel_size=1  # измените в зависимости от вашего GPU
)

# Создаем OpenAI-совместимый клиент
client = OpenAI(
    base_url="http://localhost:8000/v1",  # vLLM OpenAI-совместимый endpoint
    api_key="not-needed"  # vLLM не требует API ключа
)

# Блок 2: Системные промпты и вспомогательные функции
SYSTEM_PROMPT_QUESTIONS = """You are an expert in coming up with follow up questions to uncover deeper insights."""

SYSTEM_PROMPT_ANALYSIS = """You are an expert in analyzing communication skills from interview transcripts. Your task is to:
1. Analyze the communication skills demonstrated in the transcript
2. Identify specific quotes that support your analysis
3. Provide a detailed breakdown of strengths and areas for improvement"""

def format_questions_prompt(name, objective, number, context):
    return f"""Imagine you are an interviewer specialized in designing interview questions to help hiring managers find candidates with strong technical expertise and project experience, making it easier to identify the ideal fit for the role.
              
Interview Title: {name}
Interview Objective: {objective}

Number of questions to be generated: {number}

Follow these detailed guidelines when crafting the questions:
- Focus on evaluating the candidate's technical knowledge and their experience working on relevant projects. Questions should aim to gauge depth of expertise, problem-solving ability, and hands-on project experience. These aspects carry the most weight.
- Include questions designed to assess problem-solving skills through practical examples. For instance, how the candidate has tackled challenges in previous projects, and their approach to complex technical issues.
- Soft skills such as communication, teamwork, and adaptability should be addressed, but given less emphasis compared to technical and problem-solving abilities.
- Maintain a professional yet approachable tone, ensuring candidates feel comfortable while demonstrating their knowledge.
- Ask concise and precise open-ended questions that encourage detailed responses. Each question should be 30 words or less for clarity.

Use the following context to generate the questions:
{context}

Moreover generate a 50 word or less second-person description about the interview to be shown to the user. It should be in the field 'description'.
Do not use the exact objective in the description. Remember that some details are not be shown to the user. It should be a small description for the
user to understand what the content of the interview would be. Make sure it is clear to the respondent who's taking the interview.

The field 'questions' should take the format of an array of objects with the following key: question. 

Strictly output only a JSON object with the keys 'questions' and 'description'."""

def format_analysis_prompt(transcript):
    return f"""Analyze the communication skills demonstrated in the following interview transcript:

Transcript: {transcript}

Please provide your analysis in the following JSON format:
{{
  "communicationScore": number, // Score from 0-10 based on the standard communication scoring system
  "overallFeedback": string,   // 2-3 sentence summary of communication skills
  "supportingQuotes": [        // Array of relevant quotes with analysis
    {{
      "quote": string,         // The exact quote from the transcript
      "analysis": string,      // Brief analysis of what this quote demonstrates about communication skills
      "type": string          // Either "strength" or "improvement_area"
    }}
  ],
  "strengths": [string],       // List of communication strengths demonstrated
  "improvementAreas": [string] // List of areas where communication could be improved
}}"""

# Блок 3: Функция генерации вопросов
def generate_interview_questions(name, objective, number, context):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT_QUESTIONS},
        {"role": "user", "content": format_questions_prompt(name, objective, number, context)}
    ]
    
    response = client.chat.completions.create(
        model="Qwen-3-7B",
        messages=messages,
        temperature=0.7,
        max_tokens=1000
    )
    
    return json.loads(response.choices[0].message.content)

# Блок 4: Функция анализа коммуникации
def analyze_communication(transcript):
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT_ANALYSIS},
        {"role": "user", "content": format_analysis_prompt(transcript)}
    ]
    
    response = client.chat.completions.create(
        model="Qwen-3-7B",
        messages=messages,
        temperature=0.7,
        max_tokens=1000
    )
    
    return json.loads(response.choices[0].message.content)

# Блок 5: Тестирование
# Пример использования
if __name__ == "__main__":
    # Тестовые данные
    test_data = {
        "name": "Senior Python Developer Interview",
        "objective": "Evaluate technical expertise in Python and system design",
        "number": 5,
        "context": "Looking for a senior Python developer with experience in distributed systems"
    }
    
    # Генерация вопросов
    print("Generating interview questions...")
    questions_result = generate_interview_questions(
        test_data["name"],
        test_data["objective"],
        test_data["number"],
        test_data["context"]
    )
    print("\nGenerated Questions:")
    print(json.dumps(questions_result, indent=2))
    
    # Тестовый транскрипт
    test_transcript = """
    Interviewer: Can you tell me about your experience with distributed systems?
    Candidate: I've worked extensively with microservices architecture. In my last project, I designed a system that handled 1 million requests per second using Python and Redis for caching.
    Interviewer: How did you handle scaling issues?
    Candidate: We implemented horizontal scaling using Kubernetes and used Redis Cluster for distributed caching. I also wrote custom monitoring tools in Python to track system performance.
    """
    
    # Анализ коммуникации
    print("\nAnalyzing communication...")
    analysis_result = analyze_communication(test_transcript)
    print("\nCommunication Analysis:")
    print(json.dumps(analysis_result, indent=2))