In [61]:
#r "nuget:HtmlAgilityPack, 1.11.65"
#r "nuget:ReverseMarkdown, 4.6.0"
#r "nuget: Microsoft.Extensions.Configuration, 8.0.0"
#r "nuget: Microsoft.Extensions.Configuration.FileExtensions, 8.0.1"
#r "nuget: Microsoft.Extensions.Configuration.Json, 8.0.0"
#r "nuget: Microsoft.SemanticKernel, 1.17.2"

using HtmlAgilityPack;
using ReverseMarkdown;
using Microsoft.Extensions.Configuration;
using System.IO;


using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;

public static var config = new ConfigurationBuilder()
          .AddJsonFile(Path.GetFullPath("secrets.json"), optional: false, reloadOnChange: true)
          .Build();

public static string OpenAIKey =  config["openai-key"];

In [75]:
var url = "https://www.bradjolicoeur.com/article/fabricate-sample-data";
var uri = new Uri(url);

// Get the URL specified
var webGet = new HtmlWeb();
var document = webGet.Load(uri);
var body =  document.DocumentNode.SelectSingleNode("/html/body");
var metaTags = document.DocumentNode.SelectNodes("//meta");
var hasMeta = metaTags != null;
var metaText = string.Empty;

body.OuterHtml

<body>
    <nav class="navbar navbar-expand-sm navbar-fixed-top navbar-toggleable-sm navbar-custom custom-toggler">
        <div class="container">
            <a class="navbar-brand" href="/">
                <img src="/images/android-chrome-192x192.png" class="nav-bar-image" alt="logo">
                Brad Jolicoeur
            </a>

            <button class="navbar-toggler text-white" type="button" data-bs-toggle="collapse" data-bs-target=".navbar-collapse" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
                <span class="navbar-toggler-icon"></span>
            </button>
            <div class="navbar-collapse collapse d-sm-inline-flex justify-content-between">
                <ul class="navbar-nav flex-grow-1">
                    <li class="nav-item ms-5 me-3"><a class="nav-link text-dark fw-semibold" href="/">Home</a></li>
                    <li class="nav-item me-3"><a class="nav-link text-dark fw-semibold" href="/Blog"

In [63]:
body.OuterHtml

<body class="post-template-default single single-post postid-53826 single-format-standard wp-embed-responsive et-tb-has-template et-tb-has-body et-tb-has-footer category-products et_pb_button_helper_class et_transparent_nav et_non_fixed_nav et_show_nav et_pb_show_title et_primary_nav_dropdown_animation_fade et_secondary_nav_dropdown_animation_fade et_header_style_left et_cover_background et_pb_gutter windows et_pb_gutters3 et_divi_theme et-db">
	<div id="page-container">
<div id="et-boc" class="et-boc">
			
		<header class="docker-menu">
<nav>

    <div class="menu-alt">
        <div class="menu-alt-wrap frow fgap-small type-h6 ff-roboto">
            <a href="https://docs.docker.com/build/cloud/">Docs</a>
            <a href="/support/">Get support</a>
            <a href="/pricing/contact-sales/">Contact sales</a>
        </div>
    </div>
    <ul class="main-menu">
        <li class="logo"><a href="/"><?xml version="1.0" encoding="UTF-8"?><svg class="docker_logo " id="Layer_1" data-

In [85]:
if(hasMeta)
{
   var sb = new StringBuilder();
   foreach(var item in metaTags)
   {
    sb.AppendLine(item.GetAttributeValue("property", "") + "|"+ item.GetAttributeValue("content", ""));
   }
   metaText = sb.ToString();
}

In [86]:
metaText

|I was recently working on learning more about using Dataframes and Polyglot Notebooks with C# and needed some sample data to work with.  At first I started by looking for some examples online, but I wanted to write a blog about my experience and wanted my own sample of data.  
og:title|Fabricate Sample Data with ChatGPT
og:type|website
og:url|http://www.bradjolicoeur.com/article/fabricate-sample-data
og:image|https://storage.googleapis.com/blastcms-prod/blog-blastcms/75093115-d77a-4399-a766-4341544dd235-20240824152929.jpg
og:site_name|Brad Jolicoeur
og:description|I was recently working on learning more about using Dataframes and Polyglot Notebooks with C# and needed some sample data to work with.  At first I started by looking for some examples online, but I wanted to write a blog about my experience and wanted my own sample of data.  


In [77]:
var config = new ReverseMarkdown.Config{
    UnknownTags = Config.UnknownTagsOption.Drop
};

var converter = new ReverseMarkdown.Converter(config);
string html = body.OuterHtml;

string result = converter.Convert(html);
result

# Fabricate Sample Data with ChatGPT

![Fabricate Sample Data with ChatGPT](https://storage.googleapis.com/blastcms-prod/blog-blastcms/75093115-d77a-4399-a766-4341544dd235-20240824152929.jpg)

##### by Brad Jolicoeur

###### 08/24/2024

- tags:
- [Data Analysis](/blog/Data%20Analysis)

I was recently working on learning more about using Dataframes and Polyglot Notebooks with C# and needed some sample data to work with.  At first I started by looking for some examples online, but I wanted to write an article about my experience and needed my own sample set of data.

My first thought was I can generate a sample set with a small script, but that would take valuable time away from what I was trying to learn.  Then I remembered ChatGPT.  Could it fabricate a sample dataset for me that is somewhat realistic?

I was pleasantly surprised to find that ChatGPT is super easy and efficient at generating sample data files.   My first attempts were very basic, but I was getting good r

In [84]:
#pragma warning disable SKEXP0010

static string ModelId = "gpt-4o-mini";

// Create a kernel with OpenAI chat completion
var builder = Kernel.CreateBuilder()
.AddOpenAIChatCompletion(ModelId, OpenAIKey);

Kernel kernel = builder.Build();

var textToSummarize = hasMeta ? metaText : result;


// Create and print out the prompt
string promptBody = $"""
    Consider a JSON schema for Article Summary that includes the following  properties: Author:string, PublishDate:datetime, Title:string, Summary:string 
    
    Please summarize the the following text in 30 words or less for software engineers as the audience and output in json:
    {textToSummarize}

    # How to respond to this prompt
        - No other text, just the JSON data
    """;

// Create and print out the prompt
string promptMeta = $@"""
    Consider a JSON schema for Article Summary that includes the following  properties: Author:string, PublishDate:datetime, Title:string, Summary:string 
    
    Extract the Article Summary from the key value pairs below and output in json:
    {textToSummarize}

    # How to respond to this prompt
        - No other text, just the JSON data
    """;

string prompt = hasMeta ? promptMeta : promptBody;


// Submit the prompt and print out the response
string response = await kernel.InvokePromptAsync<string>(
    prompt,
    new(new OpenAIPromptExecutionSettings() 
        { 
            MaxTokens = 1000,
            ResponseFormat = "json_object"
        })
    );

response

{
  "Author": "Brad Jolicoeur",
  "PublishDate": null,
  "Title": "Fabricate Sample Data with ChatGPT",
  "Summary": "I was recently working on learning more about using Dataframes and Polyglot Notebooks with C# and needed some sample data to work with. At first I started by looking for some examples online, but I wanted to write a blog about my experience and wanted my own sample of data."
}