# Chapter 7 Guide

## 7.1

In [1]:
import re  #A

# Example log entries  #B
logs = [  #C
    "ERROR 2025-06-09 12:34:56 Server failed to respond",  #D
    "INFO 2025-06-09 12:35:56 User logged in",  #E
    "WARNING 2025-06-09 12:36:56 Disk space low"  #F
]

# Multi-part pattern with capture groups  #G
pattern = r"(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})"  #H

# Extract date and time from each log entry  #I
for log in logs:  #J
    match = re.search(pattern, log)  #K
    if match:  #L
        date, time = match.groups()  #M
        print(f"Date: {date}, Time: {time}") 

Date: 2025-06-09, Time: 12:34:56
Date: 2025-06-09, Time: 12:35:56
Date: 2025-06-09, Time: 12:36:56


## 7.2

In [6]:
import openai  #A
import os  #B
from dotenv import load_dotenv  #C
from pydantic import BaseModel  #D
from typing import Optional  #E

# Load API key from .env file  #F
load_dotenv()  #G
openai.api_key = os.getenv("OPENAI_API_KEY")  #H

# Define the data model for extracted output  #I
class LogExtraction(BaseModel):  #J
    date: Optional[str]  #K
    time: Optional[str]  #L

# Example log entries  #M
logs = [  #N
    "ERROR 2025-06-09 12:34:56 Server failed to respond",  #O
    "INFO 2025-06-09 12:35:56 User logged in",  #P
    "WARNING 2025-06-09 12:36:56 Disk space low"  #Q
]

# Prompt for AI to extract date and time  #R
row_prompts = [  #S
    "You are a data extraction assistant. Extract the date and time from the log entry:\n"
    "- date: Extract the date in YYYY-MM-DD format\n"
    "- time: Extract the time in HH:MM:SS format\n"
    "Return the result as a JSON object matching the LogExtraction structure."
    for log in logs  #T
]

# Process each log entry  #U
for log, prompt in zip(logs, row_prompts):  #V
    try:  #W
        # Make the API call  #X
        completion = openai.beta.chat.completions.parse(  #Y
            model="gpt-4o",  #Z
            messages=[  #AA
                {"role": "system", "content": prompt},  #AB
                {"role": "user", "content": log}  #AC
            ],  #AD
            response_format=LogExtraction  #AE
        )

        extracted = completion.choices[0].message.parsed.dict()  #AF
        print(extracted)  #AG

    except Exception as e:  #AH
        print(f"Error processing log entry: {e}")  #AI

{'date': '2025-06-09', 'time': '12:34:56'}
{'date': '2025-06-09', 'time': '12:35:56'}
{'date': '2025-06-09', 'time': '12:36:56'}


## 7.3

In [15]:
import pandas as pd  #A

# Function to convert nested JSON to a DataFrame  #B
def json_to_dataframe(json_data):  #C
    """Convert nested JSON to a pandas DataFrame."""  #D
    records = []  #E
    for book in json_data['library']['books']:  #F
        record = {  #G
            'Library Name': json_data['library']['name'],  #H
            'Location': json_data['library']['location'],  #I
            'Title': book['title'],  #J
            'Author': f"{book['author']['first_name']} {book['author']['last_name']}",  #K
            'Genres': ', '.join(book['genres']),  #L
            'Published Year': book['published_year']  #M
        }
        records.append(record)  #N
    return pd.DataFrame(records)  #O

df = json_to_dataframe(json_data) #P
display(df) #Q

Unnamed: 0,Library Name,Location,Title,Author,Genres,Published Year
0,City Library,Downtown,Python Programming,John Doe,"Programming, Technology",2020
1,City Library,Downtown,Data Science 101,Jane Smith,"Data Science, AI",2019
