## Variables and basic types

### String formatting
Python offers several ways to format strings. You'll use these constantly when building prompts, logging, and displaying data.

In [None]:
# F-strings
provider = "claude"
model = "sonnet-4.5"
message = f"Using {provider} with model {model}"
print(message)  # Using claude with model sonnet-4.5

In [None]:
# Multi-line f-strings, common for building prompts
name = str(input("Give a cool name to your AI assistant: "))
user_input = str(input("Ask your AI assistant something: "))

prompt = f"""
You are an AI assistant named {name}.
Your task is to answer the following query:
{user_input}
"""

print(prompt)

### Dictionaries and lists
These structures map directly to JSON, which you'll use for API requests and responses.

In [None]:
user_message = {
    "role": "user",
    "content": "What is the weather today?"
}

# Accessing values
print(user_message["role"])
print(user_message.get("content"))

In [None]:
# Nested dictionaries (common in API payloads)
api_request = {
    "model": "gpt-4",
    "messages": [
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": "Hello!"}
    ],
    "temperature": 0.7
}

models = ["gpt-4", "claude-3", "llama-2"]
print(models[0])

In [None]:
# Adding items
models.append("gemini-pro")

# Iterating
for model in models:
    print(f"Model: {model}")

In [None]:
# List of dictionaries (common pattern)
conversation = [
    {"role": "user", "content": "Hi"},
    {"role": "assistant", "content": "Hello! How can I help?"},
    {"role": "user", "content": "Tell me about AI"}
]

## Functions and control flow

### Function definitions
Functions in Python are straightforward. You'll write them to organize API calls, process responses, and build reusable logic.

In [None]:
# Basic function
def create_system_prompt(role: str) -> str: # role: str is a type hint for the input, the -> indicates the function return type
    return f"You are a {role}. Please assist the user."

create_system_prompt("helpful assistant")

In [None]:
# Function with multiple parameters and default values
def call_llm(prompt: str, model: str = "gpt-4", temperature: float = 0.7) -> dict:
    """
    Docstrings describe what the function does.
    They're optional but helpful for complex functions.
    """
    return {
        "prompt": prompt,
        "model": model,
        "temperature": temperature
    }

# Using the function
response = call_llm("What is AI?")  # uses defaults: model = "gpt-4", temperature = 0.7
response = call_llm("What is AI?", model="claude-3", temperature=0.5)  # overrides defaults

print(response)

In [None]:
mock_response = {
  "id": "chatcmpl-8x7k2Pm9qK3j5L8n4Yw6Xz",
  "object": "chat.completion",
  "created": 1729512345,
  "model": "gpt-4",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Hello! I'm an AI assistant. How can I help you today?"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 12,
    "completion_tokens": 15,
    "total_tokens": 27
  }
}

# Functions can return multiple values (as a tuple)
def parse_response(api_response: dict) -> tuple[str, int]:
    content = api_response["choices"][0]["message"]["content"]
    tokens = api_response["usage"]["total_tokens"]
    return content, tokens

# Unpacking the return values
text, token_count = parse_response(mock_response)

print(f"Text: {text}")
print(f"Tokens: {token_count}")

### Conditional logic
Use conditionals to handle different scenarios in your application logic.

In [None]:
# Basic if/else
def get_model_tier(model_name: str) -> str:
    if model_name.startswith("gpt"):
        return "premium"
    else:
        return "standard"

get_model_tier("gpt-5")

In [None]:
# if/elif/else for multiple conditions
def calculate_cost(tokens: int, model: str) -> float:
    if model == "gpt-4":
        rate = 0.03
    elif model == "gpt-3.5-turbo":
        rate = 0.002
    elif model == "claude-3":
        rate = 0.015
    else:
        rate = 0.001  # default rate

    return tokens * rate / 1000

calculate_cost(1000, "gpt-4")

In [None]:
# Checking if values exist (common with API responses)
def extract_content(response: dict) -> str:
    if "choices" in response and len(response["choices"]) > 0:
        return response["choices"][0]["message"]["content"]
    else:
        return "No response available"

extract_content(mock_response)

In [None]:
# Boolean conditions
def should_retry(status_code: int, attempt: int) -> bool:
    if status_code == 429 and attempt < 3:  # Rate limited, under retry limit
        return True
    return False

should_retry(429, 2)

In [None]:
response = {
    "status_code": 200,
    "data": {"message": "Request successful"}
}
# Ternary operator (inline if/else)
status = "success" if response['status_code'] == 200 else "error"

print(status)

In [None]:
# Checking for None
def process_data(data: dict | None) -> str:
    if data is None:
        return "No data provided"

    return data.get("result", "No result found")

process_data({})

### List comprehensions
List comprehensions provide a concise way to transform and filter data. You'll use them frequently when processing API responses or preparing data.

In [None]:
# Example: extracting specific fields
messages = [
    {"role": "user", "content": "Hello"},
    {"role": "assistant", "content": "Hi there!"},
    {"role": "user", "content": "How are you?"}
]

# Extract just the content from each message
print([msg["content"] for msg in messages])

In [None]:
# List comprehension with filtering
user_messages = [msg for msg in messages if msg["role"] == "user"]

print(user_messages)

In [None]:
# Transform and filter together
user_contents = [msg["content"] for msg in messages if msg["role"] == "user"]

print(user_contents)

In [None]:
# Working with strings
models = ["gpt-4-turbo", "gpt-3.5-turbo", "claude-3-opus"]

gpt_models = [model for model in models if model.startswith("gpt")]
uppercase_models = [model.upper() for model in models]

print(f"GPT models: {gpt_models} \nUppercase models: {uppercase_models}")

In [None]:
api_responses = [
    {"id": 1, "tokens": 150, "cost": 0.003},
    {"id": 2, "tokens": 200, "cost": 0.004},
    {"id": 3, "tokens": 100, "cost": 0.002}
]

# Extract total tokens
total_tokens = sum([resp["tokens"] for resp in api_responses])

print(total_tokens)

In [None]:
# Create simplified objects
summaries = [
    {"id": resp["id"], "tokens": resp["tokens"]}
    for resp in api_responses
]

print(summaries)

In [None]:
# Dictionary comprehension. Convert list to dictionary
token_map = {resp["id"]: resp["tokens"] for resp in api_responses}

print(token_map)

## Working with external libraries

### Installing packages
Python's ecosystem relies on external packages. You can use any package present in the Python Package index (https://pypi.org/). 
You'll install them using `pip` (https://github.com/pypa/pip) or `uv` (https://github.com/astral-sh/uv). This setup already contains the `requests` library from the `requirements.txt` (you can inspect the Dockerfile for details on the installation).

In [None]:
!uv pip install requests

In [None]:
import requests

In [None]:
# Make a GET request to a public API
response = requests.get("https://api.github.com/users/github")

status = "success" if response.status_code == 200 else "error"

print(f"Status: {status}")

In [None]:
# Parse the JSON response
if status == "success":
    data = response.json()
    print(f"\nUsername: {data['login']}")
    print(f"Name: {data['name']}")
    print(f"Public repos: {data['public_repos']}")

## Intro to classes

Classes let you create custom objects with their own data and behavior. You won't write many classes from scratch, but you'll use them constantly when working with objects provided by the external packages you import.

### Basic class structure

`__init__` sets up the object when you create it. Methods are functions that operate on the object's data.

In [None]:
# Define a class
class ChatMessage:
    def __init__(self, role: str, content: str):
        """__init__ runs when you create a new instance"""
        self.role = role
        self.content = content

    def to_dict(self) -> dict:
        """Methods are functions that belong to the class"""
        return {
            "role": self.role,
            "content": self.content
        }

    def is_user_message(self) -> bool:
        return self.role == "user"

In [None]:
# Create instances (objects) of the class
msg1 = ChatMessage(role="user", content="Hello")
msg2 = ChatMessage(role="assistant", content="Hi there!")

# Access attributes
print(msg1.role)
print(msg1.content)

In [None]:
# Call methods
print(msg1.to_dict())
print(msg1.is_user_message())
print(msg2.is_user_message())

### Example: Pydantic data models


In [None]:
from pydantic import BaseModel, Field

# Pydantic classes validate data automatically
class ChatRequest(BaseModel):
    message: str
    model: str = "gpt-4"
    temperature: float = Field(default=0.7, ge=0, le=2)

# Create instance - Pydantic validates the data
request = ChatRequest(message="What is AI?", temperature=0.5)

In [None]:
# Access attributes
print(request.message)
print(request.model)

In [None]:
# Convert to dict (useful for API calls)
print(request.model_dump())

In this course, you'll mostly use pre-built classes from libraries (Pydantic models, LangChain classes, etc.) rather than writing your own. Understanding how to create instances and call methods is what matters.

### Intro to decorators

A **decorator** is a function that modifies another function's behavior. The `@` symbol applies a decorator to the function below it. Here is an example of Pydantic decorators:

In [None]:
from pydantic import BaseModel, Field, field_validator
import requests

class Post(BaseModel):
    userId: int
    id: int
    title: str
    body: str
    
    @field_validator('title')
    @classmethod
    def title_must_not_be_empty(cls, v):
        if not v or not v.strip():
            raise ValueError('Title cannot be empty')
        return v
    
    @field_validator('userId')
    @classmethod
    def user_id_must_be_positive(cls, v):
        if v <= 0:
            raise ValueError('User ID must be positive')
        return v

In [None]:
response = requests.get("https://jsonplaceholder.typicode.com/posts/1")
post = Post(**response.json()) 

print(f"Validated post: {post.title}")
print(f"User ID: {post.userId}")

In this code, there are 2 decorators: `@field_validator` and `@classmethod`. Here is what each decorator does:

1. `@classmethod` - Makes this a class method. Instead of operating on a specific instance of the class (which would use `self`), it operates on the class itself (using `cls`). This is required by Pydantic's validators.
2. `@field_validator('title')` - Tells Pydantic: "Run this function when validating the `title` field"

Validation happens here automatically: 
```python
post = Post(**response.json()) 
```

When you create a `Post` object, Pydantic:
1. Checks the `title` field exists and is a string
2. Runs `title_must_not_be_empty(cls, v)` where `v` is the title value
3. If validation fails (raises `ValueError`), object creation fails
4. If validation passes (returns `v`), the object is created

The pattern:
- `v` = the value being validated
- Return `v` if valid (or a transformed version)
- Raise `ValueError` if invalid

Decorators are wrapping a function with extra behavior. Here, `@field_validator` wraps your validation function so Pydantic knows to call it automatically.

You won't need to write them yourself, but you'll see them everywhere later in the course. 

### Exercise: GitHub repository analyzer   
Core concepts needed: string formatting, dictionaries, lists, list comprehensions   

Here is the utility function you can use during implementation:    

In [None]:
import requests
from typing import Optional


# Utility function for implementation testing
def fetch_github_user_repos(username: str, token: Optional[str] = None) -> list[dict]:
    url = f"https://api.github.com/users/{username}/repos"
    
    headers = {"Accept": "application/vnd.github.v3+json"}
    if token:
        headers["Authorization"] = f"Bearer {token}"
    
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    
    repos_data = response.json()
    
    simplified_repos = []
    for repo in repos_data:
        simplified_repos.append({
            "name": repo.get("name"),
            "description": repo.get("description"),
            "stars": repo.get("stargazers_count", 0),
            "forks": repo.get("forks_count", 0),
            "language": repo.get("language"),
            "url": repo.get("html_url"),
            "updated_at": repo.get("updated_at")
        })
    
    return simplified_repos

# Example usage: test_repos = fetch_github_user_repos("octocat", token=None)

Implement the following functions to analyze GitHub repository data:

In [None]:
def format_repo_summary(repo: dict) -> str:
    """
    Format a repository summary string.
    
    Args:
        repo: Dictionary with keys: name, description, stars, language, url
    
    Returns:
        Formatted string like:
        "repo-name (Language of the repo) - 1,234 ⭐ (should contain the star emoji)
         Description here
         https://github.com/user/repo"
    """
    pass

def filter_repos_by_language(repos: list[dict], language: str) -> list[dict]:
    """
    Filter repositories by programming language.
    
    Args:
        repos: List of repository dictionaries
        language: Programming language to filter by (case-insensitive)
    
    Returns:
        Filtered list of repositories
    """
    pass

def get_top_repos(repos: list[dict], n: int = 5, sort_by: str = "stars") -> list[dict]:
    """
    Get top N repositories sorted by specified field (in descending order).
    
    Args:
        repos: List of repository dictionaries
        n: Number of repos to return
        sort_by: Field to sort by ("stars", "forks", "updated_at")
    
    Returns:
        Top N repositories
    """
    pass

Test cases:

In [None]:
def run_all_tests():
    print("=" * 70)
    print("GITHUB REPOSITORY ANALYZER")
    print("=" * 70)
    print()
    
    print("Fetching octocat's repositories from GitHub API...")
    repos = fetch_github_user_repos("octocat", token=None)
    print(f"Fetched {len(repos)} repositories\n")
    
    # Test 1: format_repo_summary
    print("Test 1: format_repo_summary")
    print("-" * 70)
    if repos:
        # Get the most starred repo
        top_repo = max(repos, key=lambda r: r.get("stars", 0))
        summary = format_repo_summary(top_repo)
        
        print(f"Testing with: {top_repo['name']}")
        print(f"Result:\n{summary}\n")
        
        # Assertions
        assert top_repo["name"] in summary, "Name should be in summary"
        assert "⭐" in summary, "Should contain star emoji"
        assert top_repo["url"] in summary, "URL should be in summary"
        
        # Check formatting of stars
        if top_repo["stars"] >= 1000:
            assert "," in summary, "Stars >= 1000 should have comma separator"
        
        print("format_repo_summary test passed!\n")
    
    # Test 2: filter_repos_by_language
    print("Test 2: filter_repos_by_language")
    print("-" * 70)
    
    # Get all languages present
    languages = set(r["language"] for r in repos if r.get("language"))
    print(f"Languages found: {languages}\n")
    
    for language in languages:
        filtered = filter_repos_by_language(repos, language)
        
        # Verify all filtered repos have the correct language
        for repo in filtered:
            assert repo.get("language") == language, f"Expected {language}, got {repo.get('language')}"
        
        assert len(filtered) > 0, f"Should find at least one {language} repo"
        print(f"{language}: Found {len(filtered)} repositories")
    
    # Test case insensitivity
    if languages:
        test_lang = list(languages)[0]
        lower_result = filter_repos_by_language(repos, test_lang.lower())
        upper_result = filter_repos_by_language(repos, test_lang.upper())
        mixed_result = filter_repos_by_language(repos, test_lang)
        
        assert len(lower_result) == len(upper_result) == len(mixed_result), \
            "Filtering should be case-insensitive"
        print(f"Case-insensitive filtering works\n")
    
    print("filter_repos_by_language test passed!\n")
    
    # Test 3: get_top_repos by stars
    print("Test 3: get_top_repos (by stars)")
    print("-" * 70)
    
    n = min(5, len(repos))
    top_by_stars = get_top_repos(repos, n=n, sort_by="stars")
    
    assert len(top_by_stars) == n, f"Expected {n} repos, got {len(top_by_stars)}"
    
    # Verify sorted in descending order
    for i in range(len(top_by_stars) - 1):
        assert top_by_stars[i]["stars"] >= top_by_stars[i + 1]["stars"], \
            "Repos should be sorted by stars in descending order"
    
    print(f"Top {n} repositories by stars:")
    for i, repo in enumerate(top_by_stars, 1):
        print(f"  {i}. {repo['name']} - {repo['stars']:,} ⭐")
    
    print("\nget_top_repos (stars) test passed!\n")
    
    # Test 4: get_top_repos by forks
    print("Test 4: get_top_repos (by forks)")
    print("-" * 70)
    
    top_by_forks = get_top_repos(repos, n=n, sort_by="forks")
    
    assert len(top_by_forks) == n, f"Expected {n} repos, got {len(top_by_forks)}"
    
    # Verify sorted in descending order
    for i in range(len(top_by_forks) - 1):
        assert top_by_forks[i]["forks"] >= top_by_forks[i + 1]["forks"], \
            "Repos should be sorted by forks in descending order"
    
    print(f"Top {n} repositories by forks:")
    for i, repo in enumerate(top_by_forks, 1):
        print(f"  {i}. {repo['name']} - {repo['forks']:,} forks")
    
    print("\nget_top_repos (forks) test passed!\n")
    
    # Test 5: Limiting results
    print("Test 5: Result limiting")
    print("-" * 70)
    
    if len(repos) >= 3:
        top_2 = get_top_repos(repos, n=2, sort_by="stars")
        assert len(top_2) == 2, f"Expected 2 repos, got {len(top_2)}"
        print("Correctly limits to n repos")
    
    # Request more than available
    large_n = len(repos) + 100
    all_repos = get_top_repos(repos, n=large_n, sort_by="stars")
    assert len(all_repos) == len(repos), \
        "Should return all available repos when n > total repos"
    print("Handles n > total repos correctly")
    
    print("\nResult limiting test passed!\n")
    
    # Summary
    print("=" * 70)
    print("ALL TESTS PASSED")
    print("=" * 70)
    print(f"\nTested with {len(repos)} repositories from octocat's profile")
    print(f"Languages tested: {', '.join(sorted(languages))}")
    print(f"Most starred repo: {top_by_stars[0]['name']} ({top_by_stars[0]['stars']:,} ⭐)")

In [None]:
try:
    run_all_tests()
except Exception as e:
    print(f"\nTEST FAILED: {e}")

### Solution

In [None]:
def format_repo_summary(repo: dict) -> str:
    """
    Format a repository summary string.
    
    Args:
        repo: Dictionary with keys: name, description, stars, language, url
    
    Returns:
        Formatted string like:
        "repo-name (Python) - 1,234 ⭐
         Description here
         https://github.com/user/repo"
    """
    name = repo.get("name", "Unknown")
    description = repo.get("description", "No description provided")
    stars = repo.get("stars", 0)
    language = repo.get("language", "Unknown")
    url = repo.get("url", "")
    stars_formatted = f"{stars:,}"
    
    # Build the summary string
    summary = f"{name} ({language}) - {stars_formatted} ⭐\n{description}\n{url}"
    
    return summary


def filter_repos_by_language(repos: list[dict], language: str) -> list[dict]:
    """
    Filter repositories by programming language.
    
    Args:
        repos: List of repository dictionaries
        language: Programming language to filter by (case-insensitive)
    
    Returns:
        Filtered list of repositories
    """
    language_lower = language.lower()
    return [
        repo for repo in repos 
        if repo.get("language") and repo.get("language").lower() == language_lower
    ]


def get_top_repos(repos: list[dict], n: int = 5, sort_by: str = "stars") -> list[dict]:
    """
    Get top N repositories sorted by specified field.
    
    Args:
        repos: List of repository dictionaries
        n: Number of repos to return
        sort_by: Field to sort by ("stars", "forks", "updated_at")
    
    Returns:
        Top N repositories
    """
    # Sort repositories by the specified field in descending order
    sorted_repos = sorted(
        repos,
        key=lambda repo: repo.get(sort_by, 0),
        reverse=True
    )
    
    # Return top N
    return sorted_repos[:n]