In [16]:

#r "nuget:HtmlAgilityPack, 1.11.65"
#r "nuget:ReverseMarkdown, 4.6.0"
#r "nuget: Microsoft.Extensions.Configuration, 8.0.0"
#r "nuget: Microsoft.Extensions.Configuration.FileExtensions, 8.0.1"
#r "nuget: Microsoft.Extensions.Configuration.Json, 8.0.0"
#r "nuget: Microsoft.SemanticKernel, 1.18.2"

using HtmlAgilityPack;
using ReverseMarkdown;
using Microsoft.Extensions.Configuration;
using System.IO;


using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;

public static var config = new ConfigurationBuilder()
          .AddJsonFile(Path.GetFullPath("secrets.json"), optional: false, reloadOnChange: true)
          .Build();

public static string OpenAIKey =  config["openai-key"];

In [17]:
var url = "https://bradjolicoeur.com/article/html-to-json-semantic-kernel";
var uri = new Uri(url);

// Get the URL specified
var webGet = new HtmlWeb();
var document = await webGet.LoadFromWebAsync(url);
var body =  document.DocumentNode.SelectSingleNode("/html/body");
var metaTags = document.DocumentNode.SelectNodes("//meta");
var metaText = string.Empty;


In [18]:
if (metaTags != null)
{
    var sb = new StringBuilder();
    foreach (var item in metaTags)
    {
        sb.Append(item.GetAttributeValue("property", ""));
        sb.Append('|');
        sb.Append(item.GetAttributeValue("content", ""));
        sb.AppendLine();
    }
    metaText = sb.ToString();
}


In [19]:
var config = new ReverseMarkdown.Config{
    UnknownTags = Config.UnknownTagsOption.Drop
};

var converter = new ReverseMarkdown.Converter(config);
string html = body.OuterHtml;

string markdownText = converter.Convert(html);


In [20]:
var sbAllText = new StringBuilder();
sbAllText.AppendLine(metaText);
sbAllText.AppendLine(markdownText);
var textToSummarize = sbAllText.ToString();

In [21]:
#pragma warning disable SKEXP0010

static string ModelId = "gpt-4o-mini";

// Create a kernel with OpenAI chat completion
var builder = Kernel.CreateBuilder()
.AddOpenAIChatCompletion(ModelId, OpenAIKey);

Kernel kernel = builder.Build();

// Create and print out the prompt
string prompt = $"""
    Consider a JSON schema for Article Summary that includes the following  properties: Author:string, PublishDate:datetime, Title:string, Summary:string, KeyWords:string, ImageUrl:string 
    
    Please summarize the the following text in 50 words or less for an summary to use in LinkedIn Featured section:
    {textToSummarize}

    # How to respond to this prompt
        - No other text, just the JSON data
    """;

// Submit the prompt and print out the response
string response = await kernel.InvokePromptAsync<string>(
    prompt,
    new(new OpenAIPromptExecutionSettings() 
        { 
            MaxTokens = 1000,
            ResponseFormat = "json_object"
        })
    );

response

{"Author":"Brad Jolicoeur","PublishDate":"2024-09-28T00:00:00Z","Title":"Convert HTML into JSON using Semantic Kernel and OpenAI","Summary":"Utilizing Semantic Kernel and OpenAI, I enhanced page scanning accuracy by converting HTML to markdown and extracting metatags, resulting in reduced input size and improved efficiency.","KeyWords":"Semantic Kernel, OpenAI, HTML, JSON, page scanning","ImageUrl":"https://storage.googleapis.com/blastcms-prod/blog-blastcms/3d223ea9-420e-4622-90b1-b8beba986840-20240928183627.jpg"}