Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proto of editing bus message to included more blob meta #91

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 78 additions & 58 deletions PipelineCommon/Helpers/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
using USFMToolsSharp;
using USFMToolsSharp.Models.Markers;
using YamlDotNet.Serialization;
using System.Security.Cryptography;
using System.Text;


namespace PipelineCommon.Helpers
{
Expand All @@ -29,12 +32,12 @@
{
MaxConnectionsPerServer = 20
};

private static HttpClient azureStorageHttpClient = new HttpClient(azureStorageHttpHandler);

private static HttpPipelineTransport azureStorageTransport = new HttpClientTransport(azureStorageHttpClient);

public static BlobContainerClient GetOutputClient()
public static BlobContainerClient GetOutputClient()
{
var connectionString = Environment.GetEnvironmentVariable("ScripturePipelineStorageConnectionString");
var outputContainer = Environment.GetEnvironmentVariable("ScripturePipelineStorageOutputContainer");
Expand All @@ -43,7 +46,7 @@
Transport = azureStorageTransport,
});
}

public static BlobContainerClient GetTemplateClient()
{
var connectionString = Environment.GetEnvironmentVariable("ScripturePipelineStorageConnectionString");
Expand All @@ -53,7 +56,7 @@
Transport = azureStorageTransport
});
}

/// <summary>
/// Generates a download link for a given repository.
/// </summary>
Expand All @@ -66,7 +69,7 @@
var downloadUri = new Uri(htmlUrl);
return $"{downloadUri.Scheme}://{downloadUri.Host}/api/v1/repos/{user}/{repo}/archive/master.zip";
}

public static void DownloadRepo(string url, string repoDir, ILogger log)
{
string repoZipFile = Path.Join(CreateTempFolder(), url.Substring(url.LastIndexOf("/")));
Expand All @@ -76,7 +79,7 @@
File.Delete(repoZipFile);
}

using (WebClient client = new WebClient())

Check warning on line 82 in PipelineCommon/Helpers/Utils.cs

View workflow job for this annotation

GitHub Actions / build-and-test

'WebClient.WebClient()' is obsolete: 'WebRequest, HttpWebRequest, ServicePoint, and WebClient are obsolete. Use HttpClient instead.'

Check warning on line 82 in PipelineCommon/Helpers/Utils.cs

View workflow job for this annotation

GitHub Actions / build-and-test

'WebClient.WebClient()' is obsolete: 'WebRequest, HttpWebRequest, ServicePoint, and WebClient are obsolete. Use HttpClient instead.'
{
log.LogInformation("Downloading {Url} to {RepoZipFile}", url, repoZipFile);
client.DownloadFile(new Uri(url), repoZipFile);
Expand Down Expand Up @@ -358,7 +361,7 @@
log.LogDebug("Uploading {Path}", file);
var tmp = outputClient.GetBlobClient(Path.Join(basePath, file).Replace("\\", "/"));
var contentType = ExtensionsToMimeTypesMapping.TryGetValue(extension, out var value) ? value : "application/octet-stream";
uploadTasks.Add(Task.Run(async ()=>
uploadTasks.Add(Task.Run(async () =>
{
await using var content = outDir.OpenRead(file);
await tmp.UploadAsync(content,
Expand Down Expand Up @@ -484,80 +487,97 @@
resourceName = resourceName,
};
}

public static async Task<List<USFMDocument>> LoadUsfmFromDirectoryAsync(ZipFileSystem directory)
{
var parser = new USFMParser(new List<string> { "s5" }, true);
var output = new List<USFMDocument>();
foreach (var f in directory.GetAllFiles(".usfm"))

public static async Task<List<USFMDocument>> LoadUsfmFromDirectoryAsync(ZipFileSystem directory)
{
var tmp = parser.ParseFromString(await directory.ReadAllTextAsync(f));
// If we don't have an abbreviation then try to figure it out from the file name
var tableOfContentsMarkers = tmp.GetChildMarkers<TOC3Marker>();
if (tableOfContentsMarkers.Count == 0)
var parser = new USFMParser(new List<string> { "s5" }, true);
var output = new List<USFMDocument>();
foreach (var f in directory.GetAllFiles(".usfm"))
{
var bookAbbreviation = GetBookAbbreviationFromFileName(f);
if (bookAbbreviation != null)
var tmp = parser.ParseFromString(await directory.ReadAllTextAsync(f));
// If we don't have an abbreviation then try to figure it out from the file name
var tableOfContentsMarkers = tmp.GetChildMarkers<TOC3Marker>();
if (tableOfContentsMarkers.Count == 0)
{
tmp.Insert(new TOC3Marker() { BookAbbreviation = bookAbbreviation });
var bookAbbreviation = GetBookAbbreviationFromFileName(f);
if (bookAbbreviation != null)
{
tmp.Insert(new TOC3Marker() { BookAbbreviation = bookAbbreviation });
}
}
}
else if (Utils.GetBookNumber(tableOfContentsMarkers[0].BookAbbreviation) == 0)
{
var bookAbbreviation = GetBookAbbreviationFromFileName(f);
if (bookAbbreviation != null)
else if (Utils.GetBookNumber(tableOfContentsMarkers[0].BookAbbreviation) == 0)
{
tableOfContentsMarkers[0].BookAbbreviation = bookAbbreviation;
var bookAbbreviation = GetBookAbbreviationFromFileName(f);
if (bookAbbreviation != null)
{
tableOfContentsMarkers[0].BookAbbreviation = bookAbbreviation;
}
}
output.Add(tmp);
}
output.Add(tmp);
return output;
}
return output;
}
public static int CountUniqueVerses(CMarker chapter)
{
var verseSelection = new HashSet<int>();
var verses = chapter.GetChildMarkers<VMarker>();
foreach (var verse in verses)
public static int CountUniqueVerses(CMarker chapter)
{
if (verse.StartingVerse == verse.EndingVerse)
var verseSelection = new HashSet<int>();
var verses = chapter.GetChildMarkers<VMarker>();
foreach (var verse in verses)
{
verseSelection.Add(verse.StartingVerse);
continue;
}
if (verse.StartingVerse == verse.EndingVerse)
{
verseSelection.Add(verse.StartingVerse);
continue;
}

for (var i = verse.StartingVerse; i <= verse.EndingVerse; i++)
{
verseSelection.Add(i);
for (var i = verse.StartingVerse; i <= verse.EndingVerse; i++)
{
verseSelection.Add(i);
}
}
}

return verseSelection.Count;
}
public static string GetBookAbbreviationFromFileName(string f)
{
string bookAbbreviation = null;
var fileNameSplit = Path.GetFileNameWithoutExtension(f).Split('-');
if (fileNameSplit.Length == 2)
return verseSelection.Count;
}
public static string GetBookAbbreviationFromFileName(string f)
{
if (Utils.BibleBookOrder.Contains(fileNameSplit[1].ToUpper()))
string bookAbbreviation = null;
var fileNameSplit = Path.GetFileNameWithoutExtension(f).Split('-');
if (fileNameSplit.Length == 2)
{
bookAbbreviation = fileNameSplit[1].ToUpper();
if (Utils.BibleBookOrder.Contains(fileNameSplit[1].ToUpper()))
{
bookAbbreviation = fileNameSplit[1].ToUpper();
}
}
else if (fileNameSplit.Length == 1)
{
if (Utils.BibleBookOrder.Contains(fileNameSplit[0].ToUpper()))
{
bookAbbreviation = fileNameSplit[0].ToUpper();
}
}

return bookAbbreviation;
}
else if (fileNameSplit.Length == 1)

public static string ComputeSha256Hash(string rawData)
{
if (Utils.BibleBookOrder.Contains(fileNameSplit[0].ToUpper()))
// Create a SHA256
using SHA256 sha256Hash = SHA256.Create();
// ComputeHash - returns byte array
byte[] bytes = sha256Hash.ComputeHash(Encoding.UTF8.GetBytes(rawData));

// Convert byte array to a string
StringBuilder builder = new StringBuilder();
for (int i = 0; i < bytes.Length; i++)
{
bookAbbreviation = fileNameSplit[0].ToUpper();
builder.Append(bytes[i].ToString("x2"));
}
return builder.ToString();
}

return bookAbbreviation;
}
}



public enum RepoType
{
Unknown,
Expand Down
23 changes: 23 additions & 0 deletions PipelineCommon/Models/BusMessages/BlobMeta.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using System;

namespace PipelineCommon.Models.BusMessages;

public class BlobMeta
{
public string Sha256 { get; set; }
public string Url { get; set; }
public long? ByteCount { get; set; }
public string TimeRendered { get; set; }

public bool BlobDoesRepresentWholeRepo { get; set; }

public string FileType { get; set; }
}
public class BlobMetaScripture : BlobMeta
{
public string ChapterNum { get; set; }

public string Slug { get; set; }

public string BookTitle { get; set; }
}
7 changes: 5 additions & 2 deletions PipelineCommon/Models/BusMessages/RenderingResultMessage.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;

namespace PipelineCommon.Models.BusMessages;

Expand All @@ -8,13 +9,15 @@ public class RenderingResultMessage
public string Message { get; set; }
public string User { get; set; }
public string Repo { get; set; }

public string RepoUrl { get; set; }
public string LanguageCode { get; set; }
public string LanguageName { get; set; }
public string ResourceType { get; set; }
public DateTime RenderedAt { get; set; }


public List<BlobMeta> blobMetaList { get; set; }

public int RepoId { get; set; }

public RenderingResultMessage(WACSMessage source)
Expand Down
71 changes: 59 additions & 12 deletions ScriptureRenderingPipelineWorker/Renderers/BibleRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
using USFMToolsSharp.Models.Markers;
using USFMToolsSharp.Renderers.HTML;
using USFMToolsSharp.Renderers.USFM;
using PipelineCommon.Models.BusMessages;


namespace ScriptureRenderingPipelineWorker.Renderers
{
public class BibleRenderer: IRenderer
public class BibleRenderer : IRenderer

Check failure on line 16 in ScriptureRenderingPipelineWorker/Renderers/BibleRenderer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

'BibleRenderer' does not implement interface member 'IRenderer.RenderAsync(RendererInput, IOutputInterface)'. 'BibleRenderer.RenderAsync(RendererInput, IOutputInterface)' cannot implement 'IRenderer.RenderAsync(RendererInput, IOutputInterface)' because it does not have the matching return type of 'Task'.

Check failure on line 16 in ScriptureRenderingPipelineWorker/Renderers/BibleRenderer.cs

View workflow job for this annotation

GitHub Actions / build-and-test

'BibleRenderer' does not implement interface member 'IRenderer.RenderAsync(RendererInput, IOutputInterface)'. 'BibleRenderer.RenderAsync(RendererInput, IOutputInterface)' cannot implement 'IRenderer.RenderAsync(RendererInput, IOutputInterface)' because it does not have the matching return type of 'Task'.
{
private static readonly string ChapterFormatString = "ch-{0}";

Expand All @@ -28,9 +30,10 @@
/// <param name="textDirection">The direction of the script being used (either rtl or ltr)</param>
/// <param name="isBTTWriterProject">Whether or not this is a BTTWriter project</param>
/// <param name="languageCode">The language code for the project</param>
public async Task RenderAsync(RendererInput input, IOutputInterface output)
public async Task<List<BlobMetaScripture>> RenderAsync(RendererInput input, IOutputInterface output)
{
List<USFMDocument> documents;
var blobMeta = new List<BlobMetaScripture>();
var downloadLinks = new List<DownloadLink>();
if (input.IsBTTWriterProject)
{
Expand Down Expand Up @@ -98,16 +101,29 @@
foreach (var chapter in chapters)
{
// If we've already written this chapter then skip it
if (alreadyWrittenChapters.Contains(chapter.Number))
{
continue;
}

if (alreadyWrittenChapters.Contains(chapter.Number))
{
continue;
}
var tmp = new USFMDocument();
tmp.Insert(chapter);
var renderedContent = renderer.Render(tmp);
var byteCount = System.Text.Encoding.UTF8.GetBytes(renderedContent).Length;
outputTasks.Add(output.WriteAllTextAsync(Path.Join(abbreviation, $"{chapter.Number.ToString()}.html"), renderedContent));
var byteCount = Encoding.UTF8.GetBytes(renderedContent).Length;
var sha256 = Utils.ComputeSha256Hash(renderedContent);
var outputPath = Path.Join(abbreviation, $"{chapter.Number.ToString()}.html");
outputTasks.Add(output.WriteAllTextAsync(outputPath, renderedContent));
blobMeta.Add(new BlobMetaScripture()
{
BlobDoesRepresentWholeRepo = false,
Url = outputPath,
Sha256 = sha256,
ByteCount = byteCount,
FileType = "html",
TimeRendered = lastRendered,
ChapterNum = chapter.Number.ToString(),
Slug = abbreviation,
BookTitle = title,
});
outputBook.Chapters.Add(new OutputChapters()
{
Number = chapter.Number.ToString(),
Expand All @@ -122,17 +138,34 @@
ByteCount = byteCount
});

alreadyWrittenChapters.Add(chapter.Number);
alreadyWrittenChapters.Add(chapter.Number);
}
index.Bible.Add(outputBook);
downloadIndex.Content.Add(bookWithContent);

// Add whole.json for each chapter for book level fetching
outputTasks.Add(output.WriteAllTextAsync(Path.Join(abbreviation, "whole.json"), JsonSerializer.Serialize(bookWithContent, WorkerJsonContext.Default.OutputBook)));
var wholeJsonFile = JsonSerializer.Serialize(bookWithContent, WorkerJsonContext.Default.OutputBook);
var wholeJsonPath = Path.Join(abbreviation, "whole.json");
outputTasks.Add(output.WriteAllTextAsync(wholeJsonPath, wholeJsonFile));
var jsonFileSize = Encoding.UTF8.GetBytes(wholeJsonFile).Length;
// pass along json blob to language api too
blobMeta.Add(new BlobMetaScripture()
{
BlobDoesRepresentWholeRepo = false, //these are chapter, not repo level
Url = wholeJsonPath,
Sha256 = Utils.ComputeSha256Hash(wholeJsonFile),
ByteCount = jsonFileSize,
FileType = "json",
TimeRendered = lastRendered,
ChapterNum = "-1", // the api this is being sent to uses ints for chapter nums as data, or -1 to indicate that it spans all the chapters for this book
Slug = abbreviation,
BookTitle = title,
});


// Since the print all page isn't going to broken up then just write stuff out here
printBuilder.AppendLine(content);
// end chapters here
}
long totalByteCount = downloadIndex.Content
.SelectMany(outputBook => outputBook.Chapters)
Expand All @@ -143,12 +176,26 @@
// If we have something then create the print_all.html page and the index.html page
if (documents.Count > 0)
{
outputTasks.Add(output.WriteAllTextAsync("print_all.html", input.PrintTemplate.Render(Hash.FromAnonymousObject(new { content = printBuilder.ToString(), heading = input.Title }))));
var printAllBlob = input.PrintTemplate.Render(Hash.FromAnonymousObject(new { content = printBuilder.ToString(), heading = input.Title }));
var printAllPath = "print_all.html";
var printAllBlobSize = Encoding.UTF8.GetBytes(printAllBlob).Length;

outputTasks.Add(output.WriteAllTextAsync(printAllBlob, printAllPath));

// pass along print_all.html to lang api too:
blobMeta.Add(new BlobMetaScripture()
{
BlobDoesRepresentWholeRepo = true,
Url = printAllPath,
Sha256 = Utils.ComputeSha256Hash(printAllBlob),
});
}

outputTasks.Add(output.WriteAllTextAsync("index.json", JsonSerializer.Serialize(index, WorkerJsonContext.Default.OutputIndex)));
outputTasks.Add(output.WriteAllTextAsync("download.json", JsonSerializer.Serialize(downloadIndex, WorkerJsonContext.Default.DownloadIndex)));

await Task.WhenAll(outputTasks);

}
/// <summary>
/// Load all USFM files in a directory inside of the ZipFileSystem
Expand Down
Loading
Loading