Skip to content

Commit

Permalink
Add MultiDawg.MatchTree()
Browse files Browse the repository at this point in the history
  • Loading branch information
bzaar committed Feb 15, 2022
1 parent 32a44b6 commit 291156c
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 32 deletions.
29 changes: 27 additions & 2 deletions DawgSharp.UnitTests/MultiDawgTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Collections;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;

Expand All @@ -8,6 +7,32 @@ namespace DawgSharp.UnitTests
[TestFixture]
public class MultiDawgTests
{
[Test]
[TestCase("ё")]
[TestCase("сё")]
[TestCase("сёл")]
[TestCase("ёж")]
public void MatchTreeTest(string yo)
{
var builder = new MultiDawgBuilder<int>();
string ye = yo.Replace('ё', 'е');
builder.Insert(ye, new [] {1});
builder.Insert(yo, new [] {2});
MultiDawg<int> multiDawg = builder.BuildMultiDawg();
AssertSequenceEquals(multiDawg.MatchTree(ye.Select(YeToYeYo)).SelectMany(p => p.Value), 1, 2);
AssertSequenceEquals(multiDawg.MatchTree(yo.Select(YeToYeYo)).SelectMany(p => p.Value), 2);
}

static IEnumerable<char> YeToYeYo(char c)
{
yield return c;

if (c == 'е')
{
yield return 'ё';
}
}

[Test]
public void Test1()
{
Expand Down
8 changes: 6 additions & 2 deletions DawgSharp/DawgSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

This package is provided under the terms of the GNU GPL v3. Source code and documentation are available on GitHub: https://github.com/bzaar/DawgSharp. Commercial licenses are also available at http://morpher.co.uk
</Description>
<Copyright>2014 - 2021 Sergey Slepov</Copyright>
<Copyright>2014 - 2022 Sergey Slepov</Copyright>
<PackageTags>DAWG;string;dictionary;lookup</PackageTags>
<VersionSuffix>$(VersionSuffix)</VersionSuffix>
<RepositoryUrl>https://github.com/bzaar/DawgSharp</RepositoryUrl>
<PackageProjectUrl>https://github.com/bzaar/DawgSharp</PackageProjectUrl>
<PackageLicenseUrl>https://github.com/bzaar/DawgSharp/blob/master/LICENSE</PackageLicenseUrl>
<PackageIconUrl>https://github.com/bzaar/DawgSharp/blob/master/icon.png?raw=true</PackageIconUrl>
<PackageReleaseNotes>Add MultiDawg.MatchPrefix()
<PackageReleaseNotes>Add MultiDawg.MatchTree()
</PackageReleaseNotes>
<DocumentationFile>bin\$(Configuration)\$(TargetFramework)\DawgSharp.xml</DocumentationFile>
<NoWarn>1591</NoWarn> <!--"Missing XML comment for publicly visible type or member."-->
Expand All @@ -39,4 +39,8 @@
<FrameworkPathOverride Condition="'$(TargetFramework)' == 'net35'">$(MSBuildProgramFiles32)\Reference Assemblies\Microsoft\Framework\.NETFramework\v3.5\Profile\Client</FrameworkPathOverride>
</PropertyGroup>

<ItemGroup>
<Compile Remove="IsExternalInit.cs" />
</ItemGroup>

</Project>
6 changes: 6 additions & 0 deletions DawgSharp/MultiDawg.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,11 @@ public IEnumerable<KeyValuePair<string, IEnumerable<TPayload>>> MatchPrefix(IEnu
public int GetNodeCount() => yaleGraph.NodeCount;

public int MaxPayloads => payloads.Length;

public IEnumerable<KeyValuePair<string, IEnumerable<TPayload>>> MatchTree(IEnumerable<IEnumerable<char>> tree)
{
return yaleGraph.MatchTree(tree)
.Select(pair => new KeyValuePair<string, IEnumerable<TPayload>> (pair.Key, GetPayloads(pair.Value)));
}
}
}
134 changes: 107 additions & 27 deletions DawgSharp/YaleGraph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,53 +45,133 @@ public IEnumerable<int> GetPath(IEnumerable<char> word)

foreach (char c in word)
{
if (c >= firstChar && c <= lastChar)
{
ushort charIndexPlusOne = charToIndexPlusOne[c - firstChar];

if (charIndexPlusOne != 0)
{
int child_i = GetChildIndex(node_i, charIndexPlusOne);
int child_i = GetChildIndex(node_i, c);

if (child_i >= 0)
{
node_i = children[child_i].Index;
if (child_i >= 0)
{
node_i = children[child_i].Index;

yield return node_i;
continue;
}
}
yield return node_i;
continue;
}

yield return -1;
yield break;
}
}

private int GetChildIndex(int node_i, ushort charIndexPlusOne)
private int GetChildIndex(int node_i, char c)
{
int firstChild_i = firstChildForNode[node_i];
if (c >= firstChar && c <= lastChar)
{
ushort charIndexPlusOne = charToIndexPlusOne[c - firstChar];

if (charIndexPlusOne != 0)
{
int firstChild_i = firstChildForNode[node_i];

int lastChild_i = firstChildForNode[node_i + 1];
int lastChild_i = firstChildForNode[node_i + 1];

int nChildren = lastChild_i - firstChild_i;
int nChildren = lastChild_i - firstChild_i;

var charIndex = (ushort)(charIndexPlusOne - 1);
var charIndex = (ushort)(charIndexPlusOne - 1);

int child_i;
int child_i;

if (nChildren == 1)
{
child_i = children[firstChild_i].CharIndex == charIndex ? firstChild_i : -1;
if (nChildren == 1)
{
child_i = children[firstChild_i].CharIndex == charIndex ? firstChild_i : -1;
}
else
{
var searchValue = new YaleChild(-1, charIndex);

child_i = Array.BinarySearch(children, firstChild_i, nChildren, searchValue, childComparer);
}

return child_i;
}
}
else

return -1;
}

public IEnumerable<KeyValuePair<string, int>> MatchTree(IEnumerable<IEnumerable<char>> tree)
{
int node_i = rootNodeIndex;

var stack = new Stack<Frame>();

var enums = tree.ToList();

var sb = new StringBuilder(enums.Count);

IEnumerator<char> enumerator = null;

for (;;)
{
var searchValue = new YaleChild(-1, charIndex);
if (enumerator != null)
{
int childIndex = -1;
while (enumerator.MoveNext())
{
childIndex = GetChildIndex(node_i, enumerator.Current);

child_i = Array.BinarySearch(children, firstChild_i, nChildren, searchValue, childComparer);
if (childIndex >= 0)
{
break;
}
}

if (childIndex >= 0)
{
sb.Append(enumerator.Current);
stack.Push(new Frame(node_i, enumerator));
node_i = children[childIndex].Index;
enumerator = null;
}
else
{
enumerator.Dispose();

if (stack.Count == 0) yield break;

(node_i, enumerator) = stack.Pop();
--sb.Length;
}
}
else if (stack.Count < enums.Count)
{
enumerator = enums[stack.Count].GetEnumerator();
}
else
{
yield return new KeyValuePair<string, int>(sb.ToString(), node_i);

if (stack.Count == 0) yield break;

(node_i, enumerator) = stack.Pop();
--sb.Length;
}
}
}

class Frame
{
public Frame(int nodeIndex, IEnumerator<char> enumerator)
{
this.NodeIndex = nodeIndex;
this.Enumerator = enumerator;
}

return child_i;
public int NodeIndex { get; }
public IEnumerator<char> Enumerator { get; }

public void Deconstruct(out int nodeIndex, out IEnumerator<char> enumerator)
{
nodeIndex = this.NodeIndex;
enumerator = this.Enumerator;
}
}

private static readonly ChildComparer childComparer = new();
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ image: Visual Studio 2019
configuration: Release
environment:
major_version: 1
minor_version: 5
minor_version: 6
patch_version: 0
version: '$(major_version).$(minor_version).$(patch_version).{build}'
dotnet_csproj:
Expand Down

0 comments on commit 291156c

Please sign in to comment.