In [182]:
from pydantic import BaseModel, Field, validator
from typing import List, Optional, Dict
from openai import OpenAI
import instructor
from instructor.function_calls import openai_schema
import os
import re

key = os.environ.get("OPENAI_SECRET")
client = instructor.patch(OpenAI(api_key=key), mode=instructor.Mode.MD_JSON)

Repetitions are linked to time:
- slowing the text
- extend time
- extend suspence
- idea of ritual

There exist different types of repetitions:
- anaphora, epistrophe


In [190]:
class Repetition(BaseModel):
    """
    Figure of speech where the same word or group of words are repeated in successive clauses or sentences. There are different types of repetitions: Anaphoras, Epistrophes, ...
    repeated_string: the group of words being repeated, if any
    repetition_sentences: all sentences containing the repeated_string, if a repeated string exists
    If repeated_string is an empty string, repetition_sentences must be an empty list
    """
    repetition_dict: Optional[Dict[str, List[str]]] = Field(
        default=None,
        description="A dictionary with repeated strings as keys and lists of sentences containing these strings as values."
    )

    @validator('repetition_dict')
    def check_repetition_dict(cls, v):
        """
        Validate that each repeated_str in the dictionary occurs more than once across its repetition_sentences.
        """
        for repeated_str, sentences in v.items():
            if not sentences:
                raise ValueError(f"The list of sentences for '{repeated_str}' is empty. Each key must have at least one sentence.")
            
            concatenated_sentences = ' '.join(sentences)
            pattern = re.escape(repeated_str)
            matches = re.findall(pattern, concatenated_sentences)
            
            if len(matches) < 2:
                raise ValueError(f"The string '{repeated_str}' does not occur more than once in its corresponding sentences.")

        return v


/var/folders/t4/4ysf02095vdb1nrl2vtyynmw0000gn/T/ipykernel_93576/1034562624.py:13: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/
  @validator('repetition_dict')


In [191]:
class AnaphoricExpression(Repetition):
    """
    A type of repetition. Occurs when the same word or group of words 
    is repeated at the beginning of successive clauses or sentences 
    that might have different endings.
    Example text containing this type of repetition.: 'This is what I think. I have a dream that my four little children will not be judged by the color of their skin. I have a dream today.'
    """
    __doc__ = Repetition.__doc__+"\n"+__doc__

class EpistrophicExpression(Repetition):
    """
    A type of repetition. Occurs the last word or phrase across successive phrases, clauses or sentences.
    repeated_string: the group of words being repeated
    container: all sentences containing repeated_string
    Example: "When I was a child, I spoke as a child, I understood as a child, I thought as a child."
    """
    __doc__ = Repetition.__doc__+"\n"+__doc__



In [198]:
resp = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {
            "role": "user",
            "content": f"""
            Extract
            `So I went to the library. The library. ciao. Yes the library.` 
            """,
        },
    ],
    response_model=AnaphoricExpression,
    temperature=0
)
resp

AnaphoricExpression(repetition_dict={'library': ['So I went to the library.', 'The library.', 'Yes the library.']})

In [None]:
openai_schema(Repetition).openai_schema

{'name': 'Repetition',
 'description': "Figure of speech where the same word or group of words are repeated in successive clauses or sentences.\nrepeated_string: the group of words being repeated, if any\nrepetition_sentences: all sentences containing the repeated_string, if a repeated string exists\nIf repeated_string is an empty string, repetition_sentences must be an empty list\nExample: 'I am hungry! I am hungry -- she said'\nrepeated_string: 'I am hungry'\nrepetition_sentences: ['I am hungry!', 'I am hungry -- she said']",
 'parameters': {'properties': {'repeated_string': {'anyOf': [{'type': 'string'},
     {'type': 'null'}],
    'default': None,
    'description': 'The group of words being repeated, if any',
    'title': 'Repeated String'},
   'repetition_sentences': {'anyOf': [{'items': {'type': 'string'},
      'type': 'array'},
     {'type': 'null'}],
    'default': None,
    'description': 'All sentences containing the repeated_string, if a repeated string exists',
    'title

In [None]:
openai_schema(AnaphoricExpression).openai_schema

{'name': 'AnaphoricExpression',
 'description': "Figure of speech where the same word or group of words are repeated in successive clauses or sentences.\nrepeated_string: the group of words being repeated, if any\nrepetition_sentences: all sentences containing the repeated_string, if a repeated string exists\nIf repeated_string is an empty string, repetition_sentences must be an empty list\nThere are different types of repetitions: Anaphoras, Epistrophes, ...\n\n\nA type of repetition. Occurs when the same word or group of words \nis repeated at the beginning of successive clauses or sentences \nthat might have different endings.\nExample text containing this type of repetition.: 'This is what I think. I have a dream that my four little children will not be judged by the color of their skin. I have a dream today.'",
 'parameters': {'properties': {'repeated_string': {'anyOf': [{'type': 'string'},
     {'type': 'null'}],
    'default': None,
    'description': 'The group of words being r