Skip to content

Commit

Permalink
Merge pull request #34 from alopezlago/alopezlago/public_v042
Browse files Browse the repository at this point in the history
Merged PR 226: v0.4.2 - .Net 6 support, minor perf fixes
  • Loading branch information
alopezlago committed Nov 30, 2021
2 parents 0683fc4 + 4a983aa commit a5c744a
Show file tree
Hide file tree
Showing 24 changed files with 622 additions and 87 deletions.
16 changes: 11 additions & 5 deletions YetAnotherPacketParser/YetAnotherPacketParser.sln
@@ -1,15 +1,17 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30320.27
# Visual Studio Version 17
VisualStudioVersion = 17.0.31912.275
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YetAnotherPacketParser", "YetAnotherPacketParser\YetAnotherPacketParser.csproj", "{99472784-A154-49F0-9E85-10F4A5300DE0}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YetAnotherPacketParserCommandLine", "YetAnotherPacketParserCommandLine\YetAnotherPacketParserCommandLine.csproj", "{EC1C3290-BFAE-4E31-BF4E-A2A2D736DC6B}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YetAnotherPacketParserCommandLine", "YetAnotherPacketParserCommandLine\YetAnotherPacketParserCommandLine.csproj", "{EC1C3290-BFAE-4E31-BF4E-A2A2D736DC6B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YetAnotherPacketParserTests", "YetAnotherPacketParserTests\YetAnotherPacketParserTests.csproj", "{811B7F2E-1E9C-486C-9B26-62B99C239CE0}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YetAnotherPacketParserTests", "YetAnotherPacketParserTests\YetAnotherPacketParserTests.csproj", "{811B7F2E-1E9C-486C-9B26-62B99C239CE0}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YetAnotherPacketParserAzureFunction", "YetAnotherPacketParserAzureFunction\YetAnotherPacketParserAzureFunction.csproj", "{B594E98C-9772-40F8-93BA-F641AE397140}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "YetAnotherPacketParserAzureFunction", "YetAnotherPacketParserAzureFunction\YetAnotherPacketParserAzureFunction.csproj", "{B594E98C-9772-40F8-93BA-F641AE397140}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YetAnotherPacketParserAPI", "YetAnotherPacketParserAPI\YetAnotherPacketParserAPI.csproj", "{90AD57C5-763C-494E-AE92-F90BB0BB5A88}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -33,6 +35,10 @@ Global
{B594E98C-9772-40F8-93BA-F641AE397140}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B594E98C-9772-40F8-93BA-F641AE397140}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B594E98C-9772-40F8-93BA-F641AE397140}.Release|Any CPU.Build.0 = Release|Any CPU
{90AD57C5-763C-494E-AE92-F90BB0BB5A88}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{90AD57C5-763C-494E-AE92-F90BB0BB5A88}.Debug|Any CPU.Build.0 = Debug|Any CPU
{90AD57C5-763C-494E-AE92-F90BB0BB5A88}.Release|Any CPU.ActiveCfg = Release|Any CPU
{90AD57C5-763C-494E-AE92-F90BB0BB5A88}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Expand Up @@ -50,7 +50,9 @@ private static void WriteTossup(TossupNode tossup, StringBuilder builder)
WriteQuestion(tossup.Question, builder);
if (!string.IsNullOrEmpty(tossup.Metadata))
{
builder.Append($"&lt;{tossup.Metadata}&gt;<br>");
builder.Append("&lt;");
builder.Append(tossup.Metadata);
builder.Append("&gt;<br>");
}

builder.Append("</p>");
Expand All @@ -70,7 +72,9 @@ private static void WriteBonus(BonusNode bonus, StringBuilder builder)

if (!string.IsNullOrEmpty(bonus.Metadata))
{
builder.Append($"&lt;{bonus.Metadata}&gt;<br>");
builder.Append("&lt;");
builder.Append(bonus.Metadata);
builder.Append($"&gt;<br>");
}

builder.Append("</p>");
Expand Down
@@ -1,4 +1,5 @@
using System.Diagnostics.CodeAnalysis;
using System;
using System.Diagnostics.CodeAnalysis;
using System.Text.Json;

namespace YetAnotherPacketParser.Compiler.Json
Expand All @@ -14,7 +15,9 @@ public override string ConvertName(string name)
Verify.IsNotNull(name, nameof(name));

// Names will not be null or empty
return name.Substring(0, 1).ToLowerInvariant() + name.Substring(1);
Span<char> firstLetter = new Span<char>(new char[1]);
name.AsSpan(0, 1).ToLowerInvariant(firstLetter);
return string.Concat(firstLetter, name.AsSpan(1));
}
}
}
Expand Up @@ -58,7 +58,9 @@ private TossupNode SanitizeTossup(TossupNode node)
// We want to escape rather just Sanitize
string? sanitizedMetadata = node.Metadata == null ?
null :
this.Sanitizer.Sanitize(node.Metadata.Replace("<", "&lt;").Replace(">", "&gt;"));
this.Sanitizer.Sanitize(node.Metadata
.Replace("<", "&lt;", StringComparison.Ordinal)
.Replace(">", "&gt;", StringComparison.Ordinal));

return new TossupNode(node.Number, sanitizedQuestion, sanitizedMetadata);
}
Expand All @@ -83,7 +85,9 @@ private BonusNode SanitizeBonus(BonusNode node)
sanitizedBonusParts.Add(this.SanitizeBonusPart(bonusPart));
}
string? sanitizedMetadata = node.Metadata != null ?
this.Sanitizer.Sanitize(node.Metadata.Replace("<", "&lt;").Replace(">", "&gt;")) :
this.Sanitizer.Sanitize(node.Metadata
.Replace("<", "&lt;", StringComparison.Ordinal)
.Replace(">", "&gt;", StringComparison.Ordinal)) :
null;

return new BonusNode(node.Number, sanitizedLeadin, sanitizedBonusParts, sanitizedMetadata);
Expand Down
Expand Up @@ -44,7 +44,7 @@ public override bool Equals(object? obj)

public override int GetHashCode()
{
return (this.Text?.GetHashCode() ?? 0) ^
return (this.Text?.GetHashCode(StringComparison.Ordinal) ?? 0) ^
this.Bolded.GetHashCode() ^
(this.Italic.GetHashCode() << 1) ^
(this.Underlined.GetHashCode() << 2);
Expand Down
18 changes: 9 additions & 9 deletions YetAnotherPacketParser/YetAnotherPacketParser/Lexer/DocxLexer.cs
Expand Up @@ -25,7 +25,7 @@ public class DocxLexer : ILexer
/// <param name="stream">Stream whose contents are a .docx Microsoft Word file</param>
/// <returns>If we were unable to open the stream, then the result is a FailureResult. Otherwise, it is a
/// SuccessResult with a collection of lines from the document.</returns>
public Task<IResult<IEnumerable<ILine>>> GetLines(Stream stream)
public async Task<IResult<IEnumerable<ILine>>> GetLines(Stream stream)
{
Verify.IsNotNull(stream, nameof(stream));

Expand All @@ -39,32 +39,32 @@ public Task<IResult<IEnumerable<ILine>>> GetLines(Stream stream)
{
IResult<IEnumerable<ILine>> nullBodyLines = new FailureResult<IEnumerable<ILine>>(
Strings.UnableToOpenDocx("Couldn't find the body of the document."));
return Task.FromResult(nullBodyLines);
return nullBodyLines;
}

IResult<IEnumerable<ILine>> lines = new SuccessResult<IEnumerable<ILine>>(GetLinesFromBody(body));
return Task.FromResult(lines);
return lines;
}
}
catch (ArgumentNullException ex)
{
Console.Error.WriteLine(ex);
await Console.Error.WriteLineAsync(ex.ToString()).ConfigureAwait(false);
IResult<IEnumerable<ILine>> lines = new FailureResult<IEnumerable<ILine>>(Strings.UnexpectedNullValue);
return Task.FromResult(lines);
return lines;
}
catch (OpenXmlPackageException ex)
{
Console.Error.WriteLine(ex);
await Console.Error.WriteLineAsync(ex.ToString()).ConfigureAwait(false);
IResult<IEnumerable<ILine>> lines = new FailureResult<IEnumerable<ILine>>(
Strings.UnableToOpenDocx(ex.Message));
return Task.FromResult(lines);
return lines;
}
catch (FileFormatException ex)
{
Console.Error.WriteLine(ex);
await Console.Error.WriteLineAsync(ex.ToString()).ConfigureAwait(false);
IResult<IEnumerable<ILine>> lines = new FailureResult<IEnumerable<ILine>>(
Strings.UnableToOpenDocx(ex.Message));
return Task.FromResult(lines);
return lines;
}
}

Expand Down
22 changes: 14 additions & 8 deletions YetAnotherPacketParser/YetAnotherPacketParser/Lexer/HtmlLexer.cs
Expand Up @@ -18,29 +18,35 @@ public async Task<IResult<IEnumerable<ILine>>> GetLines(Stream stream)
// Should be surrounded by a try/catch, in case parsing fails
try
{
BrowsingContext context = new BrowsingContext(Configuration.Default);
IDocument document = await context.OpenAsync((request) => request.Content(stream));
IHtmlElement? body = document.Body;
if (body == null)
IHtmlElement? body;
using (BrowsingContext context = new BrowsingContext(Configuration.Default))
{
return new FailureResult<IEnumerable<ILine>>(Strings.HtmlFileNeedsBodyElement);
IDocument document = await context.OpenAsync((request) => request.Content(stream)).ConfigureAwait(false);
body = document.Body;
if (body == null)
{
return new FailureResult<IEnumerable<ILine>>(Strings.HtmlFileNeedsBodyElement);
}
}

IList<FormattedText> textLines = this.GetTextLines(body);
IList<FormattedText> textLines = GetTextLines(body);
return ClassifyLines(textLines);
}
// Unfortunately, we don't know what AngleSharp can throw, so we have to catch-all from here
#pragma warning disable CA1031 // Do not catch general exception types
catch (Exception ex)
#pragma warning restore CA1031 // Do not catch general exception types
{
// This is bad form, but I'll try to narrow down the exceptions ltaer
Console.Error.WriteLine(ex);
await Console.Error.WriteLineAsync(ex.ToString()).ConfigureAwait(false);
IResult<IEnumerable<ILine>> lines = new FailureResult<IEnumerable<ILine>>(
Strings.UnableToOpenHtml(ex.Message));
return lines;
}
}

// We get the root paragraphs, then get all of the lines included in the root paragraph
private IList<FormattedText> GetTextLines(IHtmlElement body)
private static IList<FormattedText> GetTextLines(IHtmlElement body)
{
IList<FormattedText> formattedTexts = new List<FormattedText>();
Formatting previousFormatting = new Formatting();
Expand Down
46 changes: 27 additions & 19 deletions YetAnotherPacketParser/YetAnotherPacketParser/MagicWordDetector.cs
Expand Up @@ -18,7 +18,6 @@ internal static class MagicWordDetector
zipSpannedMagicWords
};


// Returns a succesful result if it is a zip file. It returns a read-only stream
public static async Task<Tuple<bool, Stream>> IsZipFile(Stream stream)
{
Expand All @@ -34,7 +33,7 @@ internal static class MagicWordDetector
}

byte[] buffer = new byte[zipMagicWords.Length];
await stream.ReadAsync(buffer, 0, zipMagicWords.Length);
await stream.ReadAsync(buffer, CancellationToken.None).ConfigureAwait(false);
Stream peekableStream = new PeekableStream(stream, buffer);
peekableStream.Position = 0;
return new Tuple<bool, Stream>(
Expand Down Expand Up @@ -95,7 +94,7 @@ public override int Read(byte[] buffer, int offset, int count)
{
if (offset + count > buffer.Length)
{
throw new ArgumentOutOfRangeException();
throw new ArgumentOutOfRangeException(nameof(offset));
}

int index = offset;
Expand Down Expand Up @@ -126,44 +125,52 @@ public override int Read(byte[] buffer, int offset, int count)
}
}

public override async Task<int> ReadAsync(
byte[] buffer, int offset, int count, CancellationToken cancellationToken)
public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default)
{
cancellationToken.ThrowIfCancellationRequested();

if (offset + count > buffer.Length)
{
throw new ArgumentOutOfRangeException();
}

int index = offset;
for (long i = this.position; i < this.peekBuffer.Length; i++)
int count = buffer.Length;
int index = 0;
for (long i = this.position; i < this.peekBuffer.Length && index < count; i++)
{
buffer[index] = this.peekBuffer[i];
buffer.Span[index] = this.peekBuffer[i];
index++;
this.position++;
}

// truncation from long to int is safe if the long is less than int.MaxValue
int bytesFromBuffer = index - offset;
int bytesFromStream = count - bytesFromBuffer;
int bytesFromStream = count - index;
if (bytesFromStream > 0)
{
int readCount = await this.stream.ReadAsync(buffer, index, bytesFromStream);
int readCount = await this.stream.ReadAsync(buffer.Slice(index, bytesFromStream), cancellationToken)
.ConfigureAwait(false);

if (this.CanSeek)
{
this.position = this.stream.Position;
}

return readCount + bytesFromBuffer;
return readCount + index;
}
else
{
return count;
}
}

public override async Task<int> ReadAsync(
byte[] buffer, int offset, int count, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

if (offset + count > buffer.Length)
{
throw new ArgumentOutOfRangeException(nameof(offset));
}

return await this.ReadAsync(buffer.AsMemory().Slice(offset, count), cancellationToken);
}

public override long Seek(long offset, SeekOrigin origin)
{
long result = this.stream.Seek(offset, origin);
Expand Down Expand Up @@ -192,9 +199,10 @@ public override void Close()
this.stream.Close();
}

public override ValueTask DisposeAsync()
public override async ValueTask DisposeAsync()
{
return this.stream.DisposeAsync();
await base.DisposeAsync().ConfigureAwait(false);
await this.stream.DisposeAsync().ConfigureAwait(false);
}
}
}
Expand Down
Expand Up @@ -32,7 +32,7 @@ public static class PacketConverter
Verify.IsNotNull(options, nameof(options));
Verify.IsNotNull(stream, nameof(stream));

Tuple<bool, Stream> readStreamResult = await MagicWordDetector.IsZipFile(stream);
Tuple<bool, Stream> readStreamResult = await MagicWordDetector.IsZipFile(stream).ConfigureAwait(false);
stream = readStreamResult.Item2;

try
Expand All @@ -42,7 +42,7 @@ public static class PacketConverter
// Assume it's HTML for now, and refactor if we need to support more input formats
return new ConvertResult[]
{
await CompilePacketAsync(options.StreamName, stream, options, FileType.Html)
await CompilePacketAsync(options.StreamName, stream, options, FileType.Html).ConfigureAwait(false)
};
}

Expand Down
Expand Up @@ -128,7 +128,7 @@ public IResult<PacketNode> Parse(IEnumerable<ILine> lines)
string metadata = metadataLine.Text.UnformattedText;
if (metadata.Length > 2)
{
int metadataStart = metadata.IndexOf('<');
int metadataStart = metadata.IndexOf('<', StringComparison.Ordinal);
int metadataEnd = metadata.LastIndexOf('>');
if (metadataStart >= 0 && metadataStart < metadata.Length + 1 && metadataEnd > metadataStart)
{
Expand Down Expand Up @@ -163,7 +163,7 @@ private static string GetFailureMessage(LinesEnumerator lines, string message)
}
else
{
snippet.Append(segment.Text.Substring(0, remainingLength));
snippet.Append(segment.Text.AsSpan(0, remainingLength));
}

remainingLength = checked(FailureSnippetCharacterLimit - snippet.Length);
Expand Down
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Library</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
<TargetFrameworks>netcoreapp3.1;net6.0</TargetFrameworks>
<Nullable>enable</Nullable>
<AssemblyName>YetAnotherPacketParser</AssemblyName>
<Description>Yet Another Packet Parser parses .docx quiz bowl packets and translates them to different formats like JSON or HTML</Description>
Expand All @@ -17,18 +17,17 @@
<PackageTags>quizbowl packetparser quizbowlpacketparser</PackageTags>
<PackageProjectUrl>https://github.com/alopezlago/YetAnotherPacketParser</PackageProjectUrl>
<PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
<AssemblyVersion>0.4.1.0</AssemblyVersion>
<FileVersion>0.4.1.0</FileVersion>
<Version>0.4.1.0</Version>
<AssemblyVersion>0.4.2.0</AssemblyVersion>
<FileVersion>0.4.2.0</FileVersion>
<Version>0.4.2.0</Version>
<EnableNETAnalyzers>true</EnableNETAnalyzers>
<AnalysisMode>Recommended</AnalysisMode>
<AnalysisModeSecurity>All</AnalysisModeSecurity>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="DocumentFormat.OpenXml" Version="2.13.1" />
<PackageReference Include="HtmlSanitizer" Version="6.0.441" />
<PackageReference Include="Microsoft.CodeAnalysis.NetAnalyzers" Version="5.0.3">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="HtmlSanitizer" Version="6.0.453" />
</ItemGroup>

<ItemGroup>
Expand Down
@@ -0,0 +1,12 @@
{
"version": 1,
"isRoot": true,
"tools": {
"dotnet-ef": {
"version": "6.0.0",
"commands": [
"dotnet-ef"
]
}
}
}

0 comments on commit a5c744a

Please sign in to comment.