-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
1,029 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> | ||
<PropertyGroup> | ||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> | ||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> | ||
<ProjectGuid>{A80F8E1D-1A71-4C58-9436-8F332555CC20}</ProjectGuid> | ||
<OutputType>Exe</OutputType> | ||
<AppDesignerFolder>Properties</AppDesignerFolder> | ||
<RootNamespace>CollinsTagger</RootNamespace> | ||
<AssemblyName>CollinsTagger</AssemblyName> | ||
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion> | ||
<FileAlignment>512</FileAlignment> | ||
<TargetFrameworkProfile /> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> | ||
<PlatformTarget>AnyCPU</PlatformTarget> | ||
<DebugSymbols>true</DebugSymbols> | ||
<DebugType>full</DebugType> | ||
<Optimize>false</Optimize> | ||
<OutputPath>bin\Debug\</OutputPath> | ||
<DefineConstants>DEBUG;TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> | ||
<PlatformTarget>AnyCPU</PlatformTarget> | ||
<DebugType>pdbonly</DebugType> | ||
<Optimize>true</Optimize> | ||
<OutputPath>bin\Release\</OutputPath> | ||
<DefineConstants>TRACE</DefineConstants> | ||
<ErrorReport>prompt</ErrorReport> | ||
<WarningLevel>4</WarningLevel> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<Reference Include="System" /> | ||
<Reference Include="System.Core" /> | ||
<Reference Include="System.Xml.Linq" /> | ||
<Reference Include="System.Data.DataSetExtensions" /> | ||
<Reference Include="System.Data" /> | ||
<Reference Include="System.Xml" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Compile Include="MurMur3.cs" /> | ||
<Compile Include="Program.cs" /> | ||
<Compile Include="Tagger.cs" /> | ||
<Compile Include="Trainer.cs" /> | ||
<Compile Include="Viterbi.cs" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Folder Include="Properties\" /> | ||
</ItemGroup> | ||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> | ||
<!-- To modify your build process, add your task inside one of the targets below and uncomment it. | ||
Other similar extension points exist, see Microsoft.Common.targets. | ||
<Target Name="BeforeBuild"> | ||
</Target> | ||
<Target Name="AfterBuild"> | ||
</Target> | ||
--> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 2012 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CollinsTagger", "CollinsTagger.csproj", "{A80F8E1D-1A71-4C58-9436-8F332555CC20}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* | ||
This code is public domain. | ||
The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain. See http://code.google.com/p/smhasher/ | ||
This C# variant was authored by | ||
Elliott B. Edwards and was placed into the public domain as a gist | ||
Status...Working on verification (Test Suite) | ||
Set up to run as a LinqPad (linqpad.net) script (thus the ".Dump()" call) | ||
*/ | ||
|
||
using System.IO; | ||
namespace CollinsTagger | ||
{ | ||
public static class MurMurHash3 | ||
{ | ||
//Change to suit your needs | ||
const uint seed = 144; | ||
|
||
public static int Hash(byte[] data) | ||
{ | ||
const uint c1 = 0xcc9e2d51; | ||
const uint c2 = 0x1b873593; | ||
|
||
uint h1 = seed; | ||
uint k1 = 0; | ||
uint streamLength = 0; | ||
|
||
using (BinaryReader reader = new BinaryReader(new MemoryStream(data))) | ||
{ | ||
byte[] chunk = reader.ReadBytes(4); | ||
while (chunk.Length > 0) | ||
{ | ||
streamLength += (uint)chunk.Length; | ||
switch (chunk.Length) | ||
{ | ||
case 4: | ||
/* Get four bytes from the input into an uint */ | ||
k1 = (uint) | ||
(chunk[0] | ||
| chunk[1] << 8 | ||
| chunk[2] << 16 | ||
| chunk[3] << 24); | ||
|
||
/* bitmagic hash */ | ||
k1 *= c1; | ||
k1 = rotl32(k1, 15); | ||
k1 *= c2; | ||
|
||
h1 ^= k1; | ||
h1 = rotl32(h1, 13); | ||
h1 = h1 * 5 + 0xe6546b64; | ||
break; | ||
case 3: | ||
k1 = (uint) | ||
(chunk[0] | ||
| chunk[1] << 8 | ||
| chunk[2] << 16); | ||
k1 *= c1; | ||
k1 = rotl32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
break; | ||
case 2: | ||
k1 = (uint) | ||
(chunk[0] | ||
| chunk[1] << 8); | ||
k1 *= c1; | ||
k1 = rotl32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
break; | ||
case 1: | ||
k1 = (uint)(chunk[0]); | ||
k1 *= c1; | ||
k1 = rotl32(k1, 15); | ||
k1 *= c2; | ||
h1 ^= k1; | ||
break; | ||
} | ||
chunk = reader.ReadBytes(4); | ||
} | ||
} | ||
// finalization, magic chants to wrap it all up | ||
h1 ^= streamLength; | ||
h1 = fmix(h1); | ||
|
||
unchecked //ignore overflow | ||
{ | ||
return (int)h1; | ||
} | ||
} | ||
|
||
private static uint rotl32(uint x, byte r) | ||
{ | ||
return (x << r) | (x >> (32 - r)); | ||
} | ||
|
||
private static uint fmix(uint h) | ||
{ | ||
h ^= h >> 16; | ||
h *= 0x85ebca6b; | ||
h ^= h >> 13; | ||
h *= 0xc2b2ae35; | ||
h ^= h >> 16; | ||
return h; | ||
} | ||
} | ||
} | ||
|
||
|
Oops, something went wrong.