## Build an Extraction Chain

https://js.langchain.com/docs/tutorials/extraction/

#### The Schema

In [1]:
import { z } from "zod";

const personSchema = z.object({
  name: z.optional(z.string()).describe("The name of the person"),
  hair_color: z
    .optional(z.string())
    .describe("The color of the person's hair if known"),
  height_in_meters: z
    .optional(z.string())
    .describe("Height measured in meters"),
});

#### The Extractor

In [2]:
import { ChatPromptTemplate } from "@langchain/core/prompts";

// Define a custom prompt to provide instructions and any additional context.
// 1) You can add examples into the prompt template to improve extraction quality
// 2) Introduce additional parameters to take context into account (e.g., include metadata
//    about the document from which the text was extracted.)
const promptTemplate = ChatPromptTemplate.fromMessages([
  [
    "system",
    `You are an expert extraction algorithm.
Only extract relevant information from the text.
If you do not know the value of an attribute asked to extract,
return null for the attribute's value.`,
  ],
  // Please see the how-to about improving performance with
  // reference examples.
  // ["placeholder", "{examples}"],
  ["human", "{text}"],
]);

* Instantiate the model

In [3]:
// Load environment variables
import * as dotenv from "dotenv";
dotenv.config();
// Check if the OPENAI_API_KEY environment variable is set
if (!process.env.OPENAI_API_KEY) {
  throw new Error("Missing OPENAI_API_KEY environment variable");
}

import { ChatOpenAI } from "@langchain/openai";
const llm = new ChatOpenAI({
  model: "gpt-4o-mini",
  temperature: 0
});

*  We enable structured output by creating a new object with the .withStructuredOutput method:

In [4]:
const structured_llm = llm.withStructuredOutput(personSchema);

* We can then invoke it normally:

In [6]:
const prompt = await promptTemplate.invoke({
  text: "Alan Smith is 6 feet tall and has blond hair.",
});
await structured_llm.invoke(prompt);

fetch failed

Context: trace=fa360d9a-0a0e-47bb-8e5c-5ce714eb57a9,id=fa360d9a-0a0e-47bb-8e5c-5ce714eb57a9; trace=a8cc7dff-7390-498d-9187-9c26079e303a,id=a8cc7dff-7390-498d-9187-9c26079e303a; trace=a8cc7dff-7390-498d-9187-9c26079e303a,id=4ceaf427-fe2e-4c03-bc44-f4e0d381b783


{ name: 'Alan Smith', hair_color: 'blond', height_in_meters: '1.83' }


#### Multiple Entities

In [7]:
import { z } from "zod";

const person = z.object({
  name: z.optional(z.string()).describe("The name of the person"),
  hair_color: z
    .optional(z.string())
    .describe("The color of the person's hair if known"),
  height_in_meters: z.number().nullish().describe("Height measured in meters"),
});

const dataSchema = z.object({
  people: z.array(person).describe("Extracted data about people"),
});

In [9]:
const structured_llm3 = llm.withStructuredOutput(dataSchema);
const prompt3 = await promptTemplate.invoke({
  text: "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me.",
});
await structured_llm3.invoke(prompt3);

{
  people: [
    { name: 'Jeff', hair_color: 'black', height_in_meters: 1.83 },
    { name: 'Anna', hair_color: 'black', height_in_meters: 0 }
  ]
}
