In [None]:
#r "nuget:System.Text.Json, 8.0.1"
#r "nuget:Azure.AI.DocumentIntelligence, 1.0.0-beta.2"
#r "nuget:Azure.AI.OpenAI, 1.0.0-beta.16"
#r "nuget:Azure.Identity, 1.10.4"
#r "nuget:DotNetEnv, 3.0.0"

In [None]:
using System.Net;
using System.Net.Http;
using System.Text.Json.Nodes;
using System.Text.Json;
using System.IO; 

using Azure;
using Azure.AI.DocumentIntelligence;
using Azure.AI.OpenAI;
using Azure.Identity;
using DotNetEnv;

In [None]:
Env.Load("../../.env");

string documentIntelligenceEndpoint = Environment.GetEnvironmentVariable("DOCUMENT_INTELLIGENCE_ENDPOINT");
string openAIEndpoint = Environment.GetEnvironmentVariable("COMPLETIONS_OPENAI_ENDPOINT");
string openAIModelDeployment = Environment.GetEnvironmentVariable("COMPLETIONS_OPENAI_COMPLETION_MODEL_DEPLOYMENT");
string openAIApiVersion = "2024-03-01-preview";

var credential = new DefaultAzureCredential(new DefaultAzureCredentialOptions { 
    ExcludeEnvironmentCredential = true,
    ExcludeManagedIdentityCredential = true,
    ExcludeSharedTokenCacheCredential = true,
    ExcludeInteractiveBrowserCredential = true,
    ExcludeAzurePowerShellCredential = true,
    ExcludeVisualStudioCodeCredential = false,
    ExcludeAzureCliCredential = false
});

var documentName = "../Assets/DocumentTrainingData/Invoice_1.pdf";

var documentIntelligenceClient = new DocumentIntelligenceClient(new Uri(documentIntelligenceEndpoint), credential);
var openAIClient = new OpenAIClient(new Uri(openAIEndpoint), credential);

In [None]:
var markdownAnalysisContent = new AnalyzeDocumentContent()
{
    Base64Source = BinaryData.FromBytes(File.ReadAllBytes(documentName))
};

Operation<AnalyzeResult> markdownAnalysisOperation = await documentIntelligenceClient.AnalyzeDocumentAsync(WaitUntil.Completed, "prebuilt-layout", markdownAnalysisContent, outputContentFormat: ContentFormat.Markdown);
var markdown = markdownAnalysisOperation.Value.Content;

var jsonStructure = new {
    customer = "",
    invoice_date = "",
    products = new [] {
        new {
            id = "",
            unit_price = "",
            quantity = "",
            total = ""
        }
    },
    total_amount = "",
    signatures = new [] {
        new {
            type = "",
            has_signature = "",
            signed_on = ""
        }
    }
};

ChatCompletionsOptions options = new ChatCompletionsOptions()
{
    DeploymentName = openAIModelDeployment,
    MaxTokens = 4096,
    Temperature = 0.1f,
    NucleusSamplingFactor = 0.1f
};

options.Messages.Add(new ChatRequestSystemMessage("You are an AI assistant that extracts data from documents and returns them as structured JSON objects. Do not return as a code block."));
options.Messages.Add(new ChatRequestUserMessage($"Extract the data from this invoice. If a value is not present, provide null. Use the following structure: {JsonSerializer.Serialize(jsonStructure)}"));
options.Messages.Add(new ChatRequestUserMessage(markdown));

var response = await openAIClient.GetChatCompletionsAsync(options);

var content = response.Value.Choices[0].Message.Content;

File.WriteAllText("MarkdownDataExtraction-ContentResponse.json", content);

Console.WriteLine("MarkdownDataExtraction-ContentResponse.json has been created with the content from the response from the OpenAI API.");