In [None]:

#r "nuget:HtmlAgilityPack, 1.11.65"
#r """""""""""nuget:ReverseMarkdown, 4.6.0"
#r "nuget: Microsoft.Extensions.Configuration, 8.0.0"
#r "nuget: Microsoft.Extensions.Configuration.FileExtensions, 8.0.1"
#r "nuget: Microsoft.Extensions.Configuration.Json, 8.0.0"
#r "nuget: Microsoft.SemanticKernel, 1.18.2"

using HtmlAgilityPack;
using ReverseMarkdown;
using Microsoft.Extensions.Configuration;
using System.IO;


using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;

public static var config = new ConfigurationBuilder()
          .AddJsonFile(Path.GetFullPath("secrets.json"), optional: false, reloadOnChange: true)
          .Build();

public static string OpenAIKey =  config["openai-key"];

In [2]:
var url = "https://www.onceuponachef.com/recipes/pesto-pasta-salad.html";
var uri = new Uri(url);

// Get the URL specified
var webGet = new HtmlWeb();
var document = await webGet.LoadFromWebAsync(url);
var body =  document.DocumentNode.SelectSingleNode("/html/body");



In [4]:
var config = new ReverseMarkdown.Config{
    UnknownTags = Config.UnknownTagsOption.Drop
};

var converter = new ReverseMarkdown.Converter(config);
string html = body.OuterHtml;

string textToSummarize = converter.Convert(html);


In [8]:
#pragma warning disable SKEXP0010

static string ModelId = "gpt-4o-mini";

// Create a kernel with OpenAI chat completion
var builder = Kernel.CreateBuilder()
.AddOpenAIChatCompletion(ModelId, OpenAIKey);

Kernel kernel = builder.Build();

// Create and print out the prompt
string prompt = @$"
Extract the main details of a recipe from the webpage below and convert them into a structured JSON format. The JSON object should have the following fields: title, description, ingredients, instructions, prepTime, cookTime, totalTime, servings, calories, and imageURL. For each field:

title: Capture the recipe title as a string.
description: Capture the recipe's description or introductory text, if available, as a string.
ingredients: Capture the list of ingredients as an array of strings, each string being one ingredient.
instructions: Capture the step-by-step cooking instructions as an array of strings, each string being one step.
prepTime: Capture preparation time as a string (e.g., '15 minutes').
cookTime: Capture cooking time as a string.
totalTime: Capture total time (prep + cook) as a string.
servings: Capture the number of servings as an integer or string.
calories: Capture the calories per serving as an integer or string.
imageURL: Capture the main image URL for the recipe, if available.
The resulting JSON should look like this:

```json

  ""title"": ""Recipe Title"",
  ""description"": ""Brief description of the recipe"",
  ""ingredients"": [""Ingredient 1"", ""Ingredient 2"", ""Ingredient 3""],
  ""instructions"": [""Step 1"", ""tep 2"", ""Step 3""],
  ""prepTime"" :""10 minutes"",
  ""cookTime"": ""30 minutes"",
  ""totalTime"": ""40 minutes"",
  ""servings"": ""4"",
  ""calories"": ""250"",
  ""imageURL"": ""https://example.com/image.jpg""

```
If any field is not available on the page, leave it out of the JSON object. Make sure all extracted text is clean and free from HTML tags, extraneous punctuation, or other irrelevant content.

Webpage:
{textToSummarize}
    ";

// Submit the prompt and print out the response
string response = await kernel.InvokePromptAsync<string>(
    prompt,
    new(new OpenAIPromptExecutionSettings() 
        { 
            MaxTokens = 5000,
            ResponseFormat = "json_object"
        })
    );

response

{
  "title": "22 Quick and Easy Recipes in 30 Minutes (or less)",
  "description": "5 Chef Secrets To Make You A Better Cook!",
  "ingredients": [],
  "instructions": [],
  "prepTime": "",
  "cookTime": "",
  "totalTime": "",
  "servings": "",
  "calories": "",
  "imageURL": ""
}