In [1]:
import nest_asyncio
from uuid import uuid1
from openai import AsyncOpenAI

from interview_assistant.core.schemas import QuestionGenerationRequest
from interview_assistant.core.services import AsyncTipService, AsyncQuestionService, QuestionService, TipService
from interview_assistant.core.ai.content_generation.tip_content_generation import AsyncOpenAITipContentGenerator
from interview_assistant.core.ai.content_generation.question_content_generation import AsyncOpenAIQuestionContentGenerator

# Initialize synchronous services
tip_service = TipService()
question_service = QuestionService()

# Enable async support in Jupyter
nest_asyncio.apply()

# Create async services
async_client = AsyncOpenAI()
async_tip_generator = AsyncOpenAITipContentGenerator(chat_client=async_client)
async_tip_service = AsyncTipService(content_generator=async_tip_generator)

# Create async services
async_client = AsyncOpenAI()
async_question_generator = AsyncOpenAIQuestionContentGenerator(chat_client=async_client)
async_question_service = AsyncQuestionService(content_generator=async_question_generator)

In [2]:
user_id = uuid1()
request = QuestionGenerationRequest(
    user_id=user_id,
    question_type='knowledge question',
    job_title='data analyst',
    skill_to_test="Big Query",
    n=1
)

# Testing synchronous services

In [3]:
new_questions = question_service.generate_questions(request=request)
new_questions

[GeneratedQuestion(created_at=datetime.datetime(2025, 2, 13, 14, 13, 36, 328962, tzinfo=datetime.timezone.utc), app_version='0.1.0', question="Write an SQL query in BigQuery to find the top 5 products by sales revenue from a dataset named 'sales_data' with columns 'product_id', 'product_name', 'quantity_sold', and 'price'. Assume the sales revenue is calculated as the product of 'quantity_sold' and 'price'.", expected_answer='The query should look something like this:\n\n```sql\nSELECT product_name, SUM(quantity_sold * price) AS total_revenue\nFROM sales_data\nGROUP BY product_id, product_name\nORDER BY total_revenue DESC\nLIMIT 5;\n```', evaluation_criteria='Correctness, Efficiency, Clarity, Completeness', expected_duration='10-15 minutes', id=UUID('b70fd410-ea14-11ef-86f2-faffc2513cbb'), request=QuestionGenerationRequest(user_id=UUID('b3393f02-ea14-11ef-86f2-faffc2513cbb'), question_type='knowledge question', job_title='data analyst', skill_to_test='Big Query', n=1))]

In [4]:
question_id = new_questions[0].id
tip = tip_service.generate_tip(question_id)
tip

GeneratedTip(created_at=datetime.datetime(2025, 2, 13, 14, 13, 43, 890368, tzinfo=datetime.timezone.utc), app_version='0.1.0', tip='Focus on correctly calculating the total revenue per product and using appropriate SQL clauses to aggregate and order the results.', id=UUID('bb919a0a-ea14-11ef-86f2-faffc2513cbb'), request=TipGenerationRequest(question_id=UUID('b70fd410-ea14-11ef-86f2-faffc2513cbb')))

# Testing async services

In [5]:
# Create an async function to handle the streaming
async def stream_question():
    async for chunk in async_question_service.generate_question_stream(request=request):
        if isinstance(chunk, str):
            print(chunk, end='', flush=True)

# Run the async function
await stream_question()

Write a BigQuery SQL statement to calculate the average time between a user's first visit to a website and their first purchase. Assume a dataset named 'user_data' with columns 'user_id', 'visit_timestamp', and 'purchase_timestamp'. If a user has not made a purchase yet, assume their first purchase time as the current time. 

EXPECTED_ANSWER: 
The candidate should write a BigQuery SQL statement that first identifies the earliest 'visit_timestamp' and 'purchase_timestamp' for each 'user_id', then calculates the time difference between these two timestamps for each user, and finally calculates the average of these time differences across all users. They should also account for users who have not made a purchase yet by assuming their first purchase time as the current time.

EXPECTED_DURATION: 
15 minutes

EVALUATION_CRITERIA:
- SQL proficiency
- Problem-solving skills
- Attention to detail
- Understanding of BigQuery-specific functions and syntax
- Time management skills

In [6]:
# Create an async function to handle the streaming
async def stream_tip():
    async for chunk in async_tip_service.generate_tip_stream(question_id):
        if isinstance(chunk, str):
            print(chunk, end='', flush=True)

# Run the async function
await stream_tip()

Remember to include both 'product_id' and 'product_name' in your GROUP BY clause to ensure unique products are correctly grouped, even if they share the same name.