Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ashish01 committed Oct 16, 2013
1 parent 1b07835 commit b1893b4
Show file tree
Hide file tree
Showing 8 changed files with 1,029 additions and 1 deletion.
61 changes: 61 additions & 0 deletions CollinsTagger/CollinsTagger.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{A80F8E1D-1A71-4C58-9436-8F332555CC20}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>CollinsTagger</RootNamespace>
<AssemblyName>CollinsTagger</AssemblyName>
<TargetFrameworkVersion>v3.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="MurMur3.cs" />
<Compile Include="Program.cs" />
<Compile Include="Tagger.cs" />
<Compile Include="Trainer.cs" />
<Compile Include="Viterbi.cs" />
</ItemGroup>
<ItemGroup>
<Folder Include="Properties\" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
20 changes: 20 additions & 0 deletions CollinsTagger/CollinsTagger.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CollinsTagger", "CollinsTagger.csproj", "{A80F8E1D-1A71-4C58-9436-8F332555CC20}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A80F8E1D-1A71-4C58-9436-8F332555CC20}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
111 changes: 111 additions & 0 deletions CollinsTagger/MurMur3.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
This code is public domain.
The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain. See http://code.google.com/p/smhasher/
This C# variant was authored by
Elliott B. Edwards and was placed into the public domain as a gist
Status...Working on verification (Test Suite)
Set up to run as a LinqPad (linqpad.net) script (thus the ".Dump()" call)
*/

using System.IO;
namespace CollinsTagger
{
public static class MurMurHash3
{
//Change to suit your needs
const uint seed = 144;

public static int Hash(byte[] data)
{
const uint c1 = 0xcc9e2d51;
const uint c2 = 0x1b873593;

uint h1 = seed;
uint k1 = 0;
uint streamLength = 0;

using (BinaryReader reader = new BinaryReader(new MemoryStream(data)))
{
byte[] chunk = reader.ReadBytes(4);
while (chunk.Length > 0)
{
streamLength += (uint)chunk.Length;
switch (chunk.Length)
{
case 4:
/* Get four bytes from the input into an uint */
k1 = (uint)
(chunk[0]
| chunk[1] << 8
| chunk[2] << 16
| chunk[3] << 24);

/* bitmagic hash */
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;

h1 ^= k1;
h1 = rotl32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
break;
case 3:
k1 = (uint)
(chunk[0]
| chunk[1] << 8
| chunk[2] << 16);
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
break;
case 2:
k1 = (uint)
(chunk[0]
| chunk[1] << 8);
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
break;
case 1:
k1 = (uint)(chunk[0]);
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
break;
}
chunk = reader.ReadBytes(4);
}
}
// finalization, magic chants to wrap it all up
h1 ^= streamLength;
h1 = fmix(h1);

unchecked //ignore overflow
{
return (int)h1;
}
}

private static uint rotl32(uint x, byte r)
{
return (x << r) | (x >> (32 - r));
}

private static uint fmix(uint h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
}
}


Loading

0 comments on commit b1893b4

Please sign in to comment.