In [None]:
import openai


In [None]:
with open("raw_Metadata_Image8_full.txt") as f:
    metadata = f.read()

In [None]:
messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Please transcribe the following metadata such that it follows the latest ome xml schema."},
    {"role": "assistant", "content": "Sure, please provide the raw metadata such that I can transcribe it."},
    {"role": "user", "content": metadata}
]

In [None]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-16k",
    messages=messages,
    temperature=0,
    max_tokens=5000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0,
    stop=["\"\"\""]
)
print(response["choices"][0]["message"]["content"])

In [None]:
import instructor
from openai import OpenAI
from pydantic import BaseModel

client = instructor.patch(OpenAI())

class UserDetail(BaseModel):
    name: str
    age: int
    
user = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=UserDetail,
    messages=[
        {"role": "user", "content": "Extract Aaron is 25 years old"},
    ],
)
assert isinstance(user, UserDetail)
assert user.name == "Aaron"
assert user.age == 25
print(user.model_dump_json(indent=2))

In [None]:
print(user._raw_response.model_dump_json(indent=2))

In [None]:
from openai import OpenAI
from pydantic import BaseModel
import instructor

# Apply the patch to the OpenAI client
# enables response_model keyword
client = instructor.patch(OpenAI())

class QuestionAnswer(BaseModel):
    question: str
    answer: str

question = "What is the meaning of life?"
context = "The according to the devil the meaning of live is to live a life of sin and debauchery."

qa: QuestionAnswer = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=QuestionAnswer,
    messages=[
        {
            "role": "system",
            "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context.",
        },
        {
            "role": "user",
            "content": f"using the context: {context}\n\nAnswer the following question: {question}",
        },
    ],
)
print(qa.model_dump_json(indent=2))

# LLM_Validator validation doesnt seem to work so well for me :(

In [None]:
from pydantic import BaseModel, BeforeValidator
from typing_extensions import Annotated
from instructor import llm_validator

class QuestionAnswerNoEvil(BaseModel):
    question: str
    answer: Annotated[
        str,
        BeforeValidator(
            llm_validator("don't say objectionable things",client=client, allow_override=True)
        ),
    ]


try:
    qa: QuestionAnswerNoEvil = client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=QuestionAnswerNoEvil,
        messages=[
            {
                "role": "system",
                "content": "You are a system that answers questions based on the context. answer exactly what the question asks using the context.",
            },
            {
                "role": "user",
                "content": f"using the context: {context}\n\nAnswer the following question: {question}",
            },
        ],
    )
except Exception as e:
    print(e)
    
print(qa.model_dump_json(indent=2))

In [None]:
import datetime
from openai import OpenAI
from typing import List
from pydantic import Field
from typing import Iterable, Optional
import instructor

client = instructor.patch(OpenAI(), mode=instructor.Mode.FUNCTIONS)


In [None]:
class Property(BaseModel):
    index: str = Field(..., description="Monotonically increasing ID")
    key: str = Field(description="Must be snake case")
    value: str

class Person(BaseModel):
    name: str
    age: int
    birthday: datetime.date
    properties: List[Property] = Field(
        ...,
        description="Numbered list of arbitrary extracted properties, should be exactly 2"
    )
    
class MaybePerson(BaseModel):
    result: Optional[Person] = Field(default=None)
    error: bool = Field(default=False)
    message: Optional[str]
    
    
schema = MaybePerson.model_json_schema()


def extract(content):
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user",
             "content": f"Extract '{content}'"}],
        response_model=MaybePerson
    )

In [None]:
content = f"Aaron is 25 years old his birthday was yesterday today is {datetime.datetime}. He is friends with Caro whichs brithday was last week. They both like json."
extract(content).model_dump()

In [None]:
content = (f'Today is {datetime.datetime}, yesterday Aaron did a lighsheet microscopy experiment with the following metadata. <Image ID="Image:0" Name="Image 8 #1">'
           '<Pixels BigEndian="false" DimensionOrder="XYCZT"'
           'ID="Pixels:0"'
           'PhysicalSizeX="0.0992287815904495"'
           'PhysicalSizeXUnit="µm"'
           'PhysicalSizeY="0.0992287815904495"'
           'PhysicalSizeYUnit="µm"'
           'SignificantBits="8"'
           'SizeC="3"'
           'SizeT="30"'
           'SizeX="680"'
           'SizeY="280"'
           'SizeZ="1"'
           'Type="uint8">'
           '<Channel AcquisitionMode="LaserScanningConfocalMicroscopy" Color="-16711681" EmissionWavelength="646.4456100000001" EmissionWavelengthUnit="nm" ExcitationWavelength="561.0" ExcitationWavelengthUnit="nm" Fluor="mRFP1.2" ID="Channel:0:0" IlluminationType="Epifluorescence" Name="ChS2-T1" SamplesPerPixel="1">'
           '<DetectorSettings Binning="1x1" ID="Detector:0:0"/>'
           '<LightPath/>'
           '</Channel>'
           '<Channel AcquisitionMode="LaserScanningConfocalMicroscopy" Color="-1" ID="Channel:0:1" IlluminationType="Epifluorescence" Name="T PMT-T1" SamplesPerPixel="1">'
           '<DetectorSettings Binning="1x1" ID="Detector:0:1"/>'
           '<LightPath/>'
           '</Channel>'
           '<Channel AcquisitionMode="LaserScanningConfocalMicroscopy" Color="16711935" EmissionWavelength="530.4922920000002" EmissionWavelengthUnit="nm" ExcitationWavelength="488.00000000000006" ExcitationWavelengthUnit="nm" Fluor="EGFP" ID="Channel:0:2" IlluminationType="Epifluorescence" Name="ChS1-T2" SamplesPerPixel="1">'
           '<DetectorSettings Binning="1x1" ID="Detector:1:0"/>'
           '<LightPath/>'
           '</Channel>'
           '<TiffData FirstC="0" FirstT="0" FirstZ="0" IFD="0" PlaneCount="1">'
           '<UUID FileName="testetst_Image8_edited_.ome.tif">urn:uuid:27555393-9fb6-4c14-942c-badbf7548154</UUID>'
           '</TiffData>'
           '</Pixels>'
           '</Image>')

In [None]:
from ome_types import OME
type(OME)

In [None]:
from ome_types.model import Image
from ome_types.model import Experiment
from ome_types.model import Experimenter
from ome_types.model import Annotation
    
    
schema = OME.model_json_schema()
print(schema)

In [None]:
def extract(content):
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user",
             "content": f"Extract the ome image'{content}'"}],
        response_model=OME
    )

In [None]:
from ome_types import OME

In [None]:
from ome_types import OME
print(type(OME))
def extract(content):
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user",
             "content": f"Extract '{content}'"}],
        response_model=OME
    )

content= (" BitsPerPixel	8"
          "DimensionOrder	XYCZT"
          "IsInterleaved	false"
          "IsRGB	false"
          "LittleEndian	true"
          "PixelType	uint8"
          "Series 0 Name	Image 8 #1"
          "SizeC	3"
          "SizeT	30"
          "SizeX	680"
          "SizeY	280"
          "SizeZ	1")

extract(content).model_dump()

In [14]:
from pydantic import BaseModel, Field
from typing import List, Iterable, Optional
import instructor
from openai import OpenAI

client = instructor.patch(OpenAI(), mode=instructor.Mode.FUNCTIONS)

class Image(BaseModel):
    id: str = Field(..., description="Unique identifier for the image")
    name: str = Field(..., description="Name of the image")
    
class OME(BaseModel):
    images: List[Image] = Field(..., description="List of images in the OME metadata")

print(OME.model_json_schema())
class MaybeOME(BaseModel):
    result: Optional[OME] = Field(default=None)
    error: bool = Field(default=False)
    message: Optional[str]
    
def extract(content):
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user",
             "content": f"Extract '{content}'"}],
        response_model=OME.model_json_schema()
    )

content= (" BitsPerPixel	8"
          "DimensionOrder	XYCZT"
          "IsInterleaved	false"
          "IsRGB	false"
          "LittleEndian	true"
          "PixelType	uint8"
          "Series 0 Name	Image 8 #1"
          "SizeC	3"
          "SizeT	30"
          "SizeX	680"
          "SizeY	280"
          "SizeZ	1")

extract(content).model_dump()

{'$defs': {'Image': {'properties': {'id': {'description': 'Unique identifier for the image', 'title': 'Id', 'type': 'string'}, 'name': {'description': 'Name of the image', 'title': 'Name', 'type': 'string'}}, 'required': ['id', 'name'], 'title': 'Image', 'type': 'object'}}, 'properties': {'images': {'description': 'List of images in the OME metadata', 'items': {'$ref': '#/$defs/Image'}, 'title': 'Images', 'type': 'array'}}, 'required': ['images'], 'title': 'OME', 'type': 'object'}


TypeError: unhashable type: 'dict'

In [None]:
from ome_types import from_xml
from ome_types import to_xml
from ome_types import to_dict
path = "/home/aaron/PycharmProjects/MetaGPT/raw_data/image8_start_point.ome.xml"
ome = from_xml(path)
print(to_xml(ome))
print(to_dict(ome))

In [None]:
from typing import List, Optional

from pydantic_compat import field_validator

from ome_types._autogenerated.ome_2016_06.dataset import Dataset
from ome_types._autogenerated.ome_2016_06.experiment import Experiment
from ome_types._autogenerated.ome_2016_06.experimenter import Experimenter
from ome_types._autogenerated.ome_2016_06.experimenter_group import (
    ExperimenterGroup,
)
from ome_types._autogenerated.ome_2016_06.folder import Folder
from ome_types._autogenerated.ome_2016_06.image import Image
from ome_types._autogenerated.ome_2016_06.instrument import Instrument
from ome_types._autogenerated.ome_2016_06.plate import Plate
from ome_types._autogenerated.ome_2016_06.project import Project
from ome_types._autogenerated.ome_2016_06.rights import Rights
from ome_types._autogenerated.ome_2016_06.roi import ROI
from ome_types._autogenerated.ome_2016_06.screen import Screen
from ome_types._autogenerated.ome_2016_06.structured_annotations import (
    StructuredAnnotations,
)
from ome_types._mixins._base_type import OMEType
from ome_types._mixins._ome import OMEMixin
#from ome_types._mixins._validators import validate_structured_annotations
from xsdata_pydantic_basemodel.pydantic_compat import Field

__NAMESPACE__ = "http://www.openmicroscopy.org/Schemas/OME/2016-06"


class OME(OMEMixin, OMEType):
    """The OME element is a container for all information objects accessible by
    OME.

    These information objects include descriptions of the imaging experiments
    and the people who perform them, descriptions of the microscope, the resulting
    images and how they were acquired, the analyses performed on those images,
    and the analysis results themselves.
    An OME file may contain any or all of this information.
    With the creation of the Metadata Only Companion OME-XML and Binary Only OME-TIFF files
    the top level OME node has changed slightly.
    It can EITHER:
    Contain all the previously expected elements
    OR:
    Contain a single BinaryOnly element that points at
    its Metadata Only Companion OME-XML file.

    Attributes
    ----------
    rights : None | Rights
        (The OME Rights).
    projects : list[Project]
        (The OME Projects).
    datasets : list[Dataset]
        (The OME Datasets).
    folders : list[Folder]
        (The OME Folders).
    experiments : list[Experiment]
        (The OME Experiments).
    plates : list[Plate]
        (The OME Plates).
    screens : list[Screen]
        (The OME Screens).
    experimenters : list[Experimenter]
        (The OME Experimenters).
    experimenter_groups : list[ExperimenterGroup]
        (The OME ExperimenterGroups).
    instruments : list[Instrument]
        (The OME Instruments).
    images : list[Image]
        (The OME Images).
    structured_annotations : None | StructuredAnnotations
        (The OME StructuredAnnotations).
    rois : list[ROI]
        (The OME ROIs).
    binary_only : None | "OME.BinaryOnly"
        Pointer to an external metadata file. If this element is present, then no
        other metadata may be present in this file, i.e. this file is a place-
        holder.
    uuid : None | str
        This unique identifier is used to keep track of multi part files. It allows
        the links between files to survive renaming. While OPTIONAL in the general
        case this is REQUIRED in a MetadataOnly Companion to a collection of
        BinaryOnly files.
    creator : None | str
        This is the name of the creating application of the OME-XML and preferably
        its full version. e.g "CompanyName, SoftwareName, V2.6.3456" This is
        optional but we hope it will be set by applications writing out OME-XML
        from scratch.
    """

    class Meta:
        namespace = "http://www.openmicroscopy.org/Schemas/OME/2016-06"

    rights: Optional[Rights] = Field(
        default=None,
        metadata={
            "name": "Rights",
            "type": "Element",
        },
    )
    projects: List[Project] = Field(
        default_factory=list,
        metadata={
            "name": "Project",
            "type": "Element",
        },
    )
    datasets: List[Dataset] = Field(
        default_factory=list,
        metadata={
            "name": "Dataset",
            "type": "Element",
        },
    )
    folders: List[Folder] = Field(
        default_factory=list,
        metadata={
            "name": "Folder",
            "type": "Element",
        },
    )
    experiments: List[Experiment] = Field(
        default_factory=list,
        metadata={
            "name": "Experiment",
            "type": "Element",
        },
    )
    plates: List[Plate] = Field(
        default_factory=list,
        metadata={
            "name": "Plate",
            "type": "Element",
        },
    )
    screens: List[Screen] = Field(
        default_factory=list,
        metadata={
            "name": "Screen",
            "type": "Element",
        },
    )
    experimenters: List[Experimenter] = Field(
        default_factory=list,
        metadata={
            "name": "Experimenter",
            "type": "Element",
        },
    )
    experimenter_groups: List[ExperimenterGroup] = Field(
        default_factory=list,
        metadata={
            "name": "ExperimenterGroup",
            "type": "Element",
        },
    )
    instruments: List[Instrument] = Field(
        default_factory=list,
        metadata={
            "name": "Instrument",
            "type": "Element",
        },
    )
    images: List[Image] = Field(
        default_factory=list,
        metadata={
            "name": "Image",
            "type": "Element",
        },
    )
    structured_annotations: Optional[StructuredAnnotations] = Field(
        metadata={
            "name": "StructuredAnnotations",
            "type": "Element",
        },
        default_factory=StructuredAnnotations,
    )
    # FIXME: THIS IS THE PROBLEM TypeError: unhashable type: 'dict'
    rois: List[ROI] = Field(
        default_factory=list,
        metadata={
            "name": "ROI",
            "type": "Element",
        },
    )
    binary_only: Optional["OME.BinaryOnly"] = Field(
        default=None,
        metadata={
            "name": "BinaryOnly",
            "type": "Element",
        },
    )
    uuid: Optional[str] = Field(
        default=None,
        metadata={
            "name": "UUID",
            "type": "Attribute",
            "pattern": r"(urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})",
        },
        regex="(urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})",
    )
    creator: Optional[str] = Field(
        default=None,
        metadata={
            "name": "Creator",
            "type": "Attribute",
        },
    )

    class BinaryOnly(OMEType):
        """
        Attributes
        ----------
        metadata_file : str
            Filename of the OME-XML metadata file for this binary data. If the file
            cannot be found, a search can be performed based on the UUID.
        uuid : str
            The unique identifier of another OME-XML block whose metadata describes the
            binary data in this file. This UUID is considered authoritative regardless
            of mismatches in the filename.
        """

        metadata_file: str = Field(
            metadata={
                "name": "MetadataFile",
                "type": "Attribute",
                "required": True,
            }
        )
        uuid: str = Field(
            metadata={
                "name": "UUID",
                "type": "Attribute",
                "required": True,
                "pattern": r"(urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})",
            },
            regex="(urn:uuid:[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})",
        )

    #_v_structured_annotations = field_validator(
    #    "structured_annotations", mode="before"
    #)(validate_structured_annotations)


BinaryOnly = OME.BinaryOnly

OME.model_json_schema()

In [68]:
from typing import List, Optional

from pydantic_compat import field_validator

from ome_types._autogenerated.ome_2016_06.annotation_ref import AnnotationRef
from ome_types._autogenerated.ome_2016_06.ellipse import Ellipse
from ome_types._autogenerated.ome_2016_06.label import Label
from ome_types._autogenerated.ome_2016_06.line import Line
from ome_types._autogenerated.ome_2016_06.mask import Mask
from ome_types._autogenerated.ome_2016_06.point import Point
from ome_types._autogenerated.ome_2016_06.polygon import Polygon
from ome_types._autogenerated.ome_2016_06.polyline import Polyline
from ome_types._autogenerated.ome_2016_06.rectangle import Rectangle
from ome_types._mixins._base_type import OMEType
from ome_types._mixins._collections import ShapeUnionMixin
from ome_types._mixins._validators import validate_shape_union
from xsdata_pydantic_basemodel.pydantic_compat import Field

__NAMESPACE__ = "http://www.openmicroscopy.org/Schemas/OME/2016-06"


class ROI(OMEType):
    """A four dimensional 'Region of Interest'.

    If they are not used, and the Image has more than one plane, the
    entire set of planes is assumed to be included in the ROI. Multiple
    ROIs may be specified.

    Attributes
    ----------
    union : "ROI.Union"
        (The ROI Union).
    annotation_refs : list[AnnotationRef]
        (The ROI AnnotationRefs).
    description : None | str
        A description for the ROI. [plain-text multi-line string]
    id : str
        (The ROI ID).
    name : None | str
        The Name identifies the ROI to the user. [plain-text string]
    """

    class Meta:
        namespace = "http://www.openmicroscopy.org/Schemas/OME/2016-06"

    union: "ROI.Union" = Field(
        metadata={
            "name": "Union",
            "type": "Element",
            "required": True,
        },
        default_factory=lambda: ROI.Union(),
    )
    annotation_refs: List[AnnotationRef] = Field(
        default_factory=list,
        metadata={
            "name": "AnnotationRef",
            "type": "Element",
        },
    )
    description: Optional[str] = Field(
        default=None,
        metadata={
            "name": "Description",
            "type": "Element",
            "white_space": "preserve",
        },
    )
    id: str = Field(
        default="__auto_sequence__",
        metadata={
            "name": "ID",
            "type": "Attribute",
            "required": True,
            "pattern": r"(urn:lsid:([\w\-\.]+\.[\w\-\.]+)+:\S+)|(\S+)",
        },
        regex="(urn:lsid:([\\w\\-\\.]+\\.[\\w\\-\\.]+)+:\\S+)|(\\S+)",
    )
    name: Optional[str] = Field(
        default=None,
        metadata={
            "name": "Name",
            "type": "Attribute",
        },
    )

    class Union(ShapeUnionMixin, OMEType):
        labels: List[Label] = Field(
            default_factory=list,
            metadata={
                "name": "Label",
                "type": "Element",
            },
        )
        polygons: List[Polygon] = Field(
            default_factory=list,
            metadata={
                "name": "Polygon",
                "type": "Element",
            },
        )
        polylines: List[Polyline] = Field(
            default_factory=list,
            metadata={
                "name": "Polyline",
                "type": "Element",
            },
        )
        lines: List[Line] = Field(
            default_factory=list,
            metadata={
                "name": "Line",
                "type": "Element",
            },
        )
        ellipses: List[Ellipse] = Field(
            default_factory=list,
            metadata={
                "name": "Ellipse",
                "type": "Element",
            },
        )
        points: List[Point] = Field(
            default_factory=list,
            metadata={
                "name": "Point",
                "type": "Element",
            },
        )
        masks: List[Mask] = Field(
            default_factory=list,
            metadata={
                "name": "Mask",
                "type": "Element",
            },
        )
        rectangles: List[Rectangle] = Field(
            default_factory=list,
            metadata={
                "name": "Rectangle",
                "type": "Element",
            },
        )

    #_v_shape_union = field_validator("union", mode="before")(validate_shape_union)


Union = ROI.Union


schema = ROI.model_json_schema()
print(schema)

ModuleNotFoundError: No module named 'ome_types._mixins._collections'

In [13]:
from ome_types._autogenerated.ome_2016_06 import OME
import json

schema = OME.model_json_schema()

with open('schema.json', 'w', encoding='utf-8') as f:
    json.dump(schema, f, ensure_ascii=False, indent=4)

In [8]:
from ome_types._autogenerated.ome_2016_06 import OME
from openai import OpenAI
import instructor


client = instructor.patch(OpenAI())
with open("/home/aaron/PycharmProjects/MetaGPT/raw_data/raw_Metadata_Image8.txt") as f:
    input = f.read()
    print(input)
    
def extract(content):
    return client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "user",
             "content": f"Please genearate the appropriate omexml from this raw metadata. '{input}'"}],
        response_model=OME,
        max_tokens=5000
    )

extract(input).model_dump()


 BitsPerPixel	8
 DimensionOrder	XYCZT
 IsInterleaved	false
 IsRGB	false
 LittleEndian	true
 PixelType	uint8
 Series 0 Name	Image 8 #1
 SizeC	3
 SizeT	30
 SizeX	680
 SizeY	280
 SizeZ	1
Appliance|Data|ShuttleAndFindData|Calibration|Marker|FocusPosition #1	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|FocusPosition #2	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|FocusPosition #3	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|Id #1	Marker:1
Appliance|Data|ShuttleAndFindData|Calibration|Marker|Id #2	Marker:2
Appliance|Data|ShuttleAndFindData|Calibration|Marker|Id #3	Marker:3
Appliance|Data|ShuttleAndFindData|Calibration|Marker|StageXPosition #1	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|StageXPosition #2	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|StageXPosition #3	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|StageYPosition #1	0
Appliance|Data|ShuttleAndFindData|Calibration|Marker|StageYPosition #2	0
Appliance|Data|ShuttleAndFindDa

BadRequestError: Error code: 400 - {'error': {'message': "Invalid 'tools[0].function.description': string too long. Expected a string with maximum length 1024, but got a string with length 2152 instead.", 'type': 'invalid_request_error', 'param': 'tools[0].function.description', 'code': 'string_above_max_length'}}