@@ -1,25 +1,18 @@
// Compiler implementation of the D programming language
// Copyright (c) 1999-2015 by Digital Mars
// All Rights Reserved
// written by Walter Bright
// http://www.digitalmars.com
// Distributed under the Boost Software License, Version 1.0.
// http://www.boost.org/LICENSE_1_0.txt
/* Copyright (c) 2010-2014 by Digital Mars
* All Rights Reserved, written by Walter Bright
* http://www.digitalmars.com
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
* https://github.com/D-Programming-Language/dmd/blob/master/src/root/speller.c
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
module ddmd.root.speller ;
#if __sun || _MSC_VER
#include <alloca.h>
#endif
import core.stdc.limits , core.stdc.stdlib , core.stdc.string ;
#include "speller.h"
extern ( C++ ) alias fp_speller_t = void * function ( void * , const ( char ) * , int * );
const char idchars [] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" ;
extern ( C++ ) __gshared const ( char ) * idchars = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" ;
/* *************************************************
* combine a new result from the spell checker to
Expand All
@@ -35,7 +28,7 @@ const char idchars[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123
* true if the cost is less or equal 0
* false otherwise
*/
bool combineSpellerResult (void * & p , int & cost , void * np , int ncost )
extern ( C++ ) bool combineSpellerResult(ref void * p, ref int cost, void * np, int ncost)
{
if (np && ncost < cost)
{
Expand All
@@ -47,29 +40,25 @@ bool combineSpellerResult(void*& p, int& cost, void* np, int ncost)
return false ;
}
void * spellerY (const char * seed , size_t seedlen , fp_speller_t fp , void * fparg ,
const char * charset , size_t index , int * cost )
extern (C++ ) void * spellerY(const (char )* seed, size_t seedlen, fp_speller_t fp, void * fparg, const (char )* charset, size_t index, int * cost)
{
if (! seedlen)
return NULL ;
return null ;
assert (seed[seedlen] == 0 );
char tmp [30 ];
char * buf ;
if (seedlen <= sizeof (tmp ) - 2 )
buf = tmp ;
char [30 ] tmp;
char * buf;
if (seedlen <= tmp.sizeof - 2 )
buf = tmp.ptr;
else
{
buf = (char * )alloca (seedlen + 2 ); // leave space for extra char
buf = cast (char * )alloca(seedlen + 2 ); // leave space for extra char
if (! buf)
return NULL ; // no matches
return null ; // no matches
}
memcpy(buf, seed, index);
* cost = INT_MAX ;
void * p = NULL ;
void * p = null ;
int ncost;
/* Delete at seed[index] */
if (index < seedlen)
{
Expand All
@@ -79,62 +68,53 @@ void *spellerY(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
if (combineSpellerResult(p, * cost, np, ncost))
return p;
}
if (charset && * charset)
{
/* Substitutions */
if (index < seedlen)
{
memcpy(buf, seed, seedlen + 1 );
for (const char * s = charset ; * s ; s ++ )
for (const ( char ) * s = charset; * s; s++ )
{
buf[index] = * s;
// printf("sub buf = '%s'\n", buf);
void * np = (* fp)(fparg, buf, &ncost);
if (combineSpellerResult(p, * cost, np, ncost))
return p;
}
assert (buf[seedlen] == 0 );
}
/* Insertions */
memcpy (buf + index + 1 , seed + index , seedlen + 1 - index );
for (const char * s = charset ; * s ; s ++ )
memcpy(buf + index + 1 , seed + index, seedlen + 1 - index);
for (const (char )* s = charset; * s; s++ )
{
buf[index] = * s;
// printf("ins buf = '%s'\n", buf);
void * np = (* fp)(fparg, buf, &ncost);
if (combineSpellerResult(p, * cost, np, ncost))
return p;
}
assert (buf[seedlen + 1 ] == 0 );
}
return p ; // return "best" result
return p; // return "best" result
}
void * spellerX (const char * seed , size_t seedlen , fp_speller_t fp , void * fparg ,
const char * charset , int flag )
extern (C++ ) void * spellerX(const (char )* seed, size_t seedlen, fp_speller_t fp, void * fparg, const (char )* charset, int flag)
{
if (! seedlen)
return NULL ;
char tmp [30 ];
char * buf ;
if (seedlen <= sizeof (tmp ) - 2 )
buf = tmp ;
return null ;
char [30 ] tmp;
char * buf;
if (seedlen <= tmp.sizeof - 2 )
buf = tmp.ptr;
else
{
buf = (char * )alloca (seedlen + 2 ); // leave space for extra char
buf = cast (char * )alloca(seedlen + 2 ); // leave space for extra char
if (! buf)
return NULL ; // no matches
return null ; // no matches
}
int cost = INT_MAX , ncost;
void * p = NULL , * np ;
void * p = null , np;
/* Deletions */
memcpy(buf, seed + 1 , seedlen);
for (size_t i = 0 ; i < seedlen; i++ )
Expand All
@@ -146,10 +126,8 @@ void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
np = (* fp)(fparg, buf, &ncost);
if (combineSpellerResult(p, cost, np, ncost))
return p;
buf[i] = seed[i];
}
/* Transpositions */
if (! flag)
{
Expand All
@@ -159,25 +137,21 @@ void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
// swap [i] and [i + 1]
buf[i] = seed[i + 1 ];
buf[i + 1 ] = seed[i];
// printf("tra buf = '%s'\n", buf);
if (combineSpellerResult(p, cost, (* fp)(fparg, buf, &ncost), ncost))
return p;
buf[i] = seed[i];
}
}
if (charset && * charset)
{
/* Substitutions */
memcpy(buf, seed, seedlen + 1 );
for (size_t i = 0 ; i < seedlen; i++ )
{
for (const char * s = charset ; * s ; s ++ )
for (const ( char ) * s = charset; * s; s++ )
{
buf[i] = * s;
// printf("sub buf = '%s'\n", buf);
if (flag)
np = spellerY(buf, seedlen, fp, fparg, charset, i + 1 , &ncost);
Expand All
@@ -188,15 +162,13 @@ void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
}
buf[i] = seed[i];
}
/* Insertions */
memcpy(buf + 1 , seed, seedlen + 1 );
for (size_t i = 0 ; i <= seedlen ; i ++ ) // yes, do seedlen+1 iterations
for (size_t i = 0 ; i <= seedlen; i++ ) // yes, do seedlen+1 iterations
{
for (const char * s = charset ; * s ; s ++ )
for (const ( char ) * s = charset; * s; s++ )
{
buf[i] = * s;
// printf("ins buf = '%s'\n", buf);
if (flag)
np = spellerY(buf, seedlen + 1 , fp, fparg, charset, i + 1 , &ncost);
Expand All
@@ -205,11 +177,10 @@ void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
if (combineSpellerResult(p, cost, np, ncost))
return p;
}
buf [i ] = seed [i ]; // going past end of seed[] is ok, as we hit the 0
buf[i] = seed[i]; // going past end of seed[] is ok, as we hit the 0
}
}
return p ; // return "best" result
return p; // return "best" result
}
/* *************************************************
Expand All
@@ -225,71 +196,65 @@ void *spellerX(const char *seed, size_t seedlen, fp_speller_t fp, void *fparg,
* NULL no correct spellings found
* void* value returned by fp() for first possible correct spelling
*/
void * speller (const char * seed , fp_speller_t fp , void * fparg , const char * charset )
extern (C++ ) void * speller(const (char )* seed, fp_speller_t fp, void * fparg, const (char )* charset)
{
size_t seedlen = strlen(seed);
size_t maxdist = seedlen < 4 ? seedlen / 2 : 2 ;
for (int distance = 0 ; distance < maxdist; distance++ )
{ void * p = spellerX (seed , seedlen , fp , fparg , charset , distance );
{
void * p = spellerX(seed, seedlen, fp, fparg, charset, distance);
if (p)
return p;
// if (seedlen > 10)
// break;
// if (seedlen > 10)
// break;
}
return NULL ; // didn't find it
}
#if UNITTEST
#include <stdio.h>
#include <string.h>
#include <assert.h>
void * speller_test (void * fparg , const char * s , int * cost )
{
//printf("speller_test(%s, %s)\n", fparg, s);
* cost = 0 ;
if (strcmp ((char * )fparg , s ) == 0 )
return fparg ;
return NULL ;
return null ; // didn't find it
}
void unittest_speller ( )
version ( unittest )
{
static const char * cases [][ 3 ] =
extern ( C++ ) void * speller_test( void * fparg, const ( char ) * s, int * cost)
{
{ "hello" , "hell" , "y" },
{ "hello" , "hel" , "y" },
{ "hello" , "ello" , "y" },
{ "hello" , "llo" , "y" },
{ "hello" , "hellox" , "y" },
{ "hello" , "helloxy" , "y" },
{ "hello" , "xhello" , "y" },
{ "hello" , "xyhello" , "y" },
{ "hello" , "ehllo" , "y" },
{ "hello" , "helol" , "y" },
{ "hello" , "abcd" , "n" },
{ "hello" , "helxxlo" , "y" },
{ "hello" , "ehlxxlo" , "n" },
{ "hello" , "heaao" , "y" },
{ "_123456789_123456789_123456789_123456789" , "_123456789_123456789_123456789_12345678" , "y" },
{ NULL , NULL , NULL }
};
//printf("unittest_speller()\n");
const void * p = speller ("hello" , & speller_test , (void * )"hell" , idchars );
assert (p != NULL );
for (int i = 0 ; cases [i ][0 ]; i ++ )
// printf("speller_test(%s, %s)\n", fparg, s);
* cost = 0 ;
if (strcmp(cast (char * )fparg, s) == 0 )
return fparg;
return null ;
}
extern (C++ ) void unittest_speller()
{
//printf("case [%d]\n", i);
void * p = speller (cases [i ][0 ], & speller_test , (void * )cases [i ][1 ], idchars );
if (p )
assert (cases [i ][2 ][0 ] == 'y' );
else
assert (cases [i ][2 ][0 ] == 'n' );
static __gshared const (char )*** cases =
[
[" hello" , " hell" , " y" ],
[" hello" , " hel" , " y" ],
[" hello" , " ello" , " y" ],
[" hello" , " llo" , " y" ],
[" hello" , " hellox" , " y" ],
[" hello" , " helloxy" , " y" ],
[" hello" , " xhello" , " y" ],
[" hello" , " xyhello" , " y" ],
[" hello" , " ehllo" , " y" ],
[" hello" , " helol" , " y" ],
[" hello" , " abcd" , " n" ],
[" hello" , " helxxlo" , " y" ],
[" hello" , " ehlxxlo" , " n" ],
[" hello" , " heaao" , " y" ],
[" _123456789_123456789_123456789_123456789" , " _123456789_123456789_123456789_12345678" , " y" ],
[null , null , null ]
];
// printf("unittest_speller()\n");
const (void )* p = speller(" hello" , &speller_test, cast (void * )" hell" , idchars);
assert (p ! is null );
for (int i = 0 ; cases[i][0 ]; i++ )
{
// printf("case [%d]\n", i);
void * p = speller(cases[i][0 ], &speller_test, cast (void * )cases[i][1 ], idchars);
if (p)
assert (cases[i][2 ][0 ] == ' y' );
else
assert (cases[i][2 ][0 ] == ' n' );
}
// printf("unittest_speller() success\n");
}
//printf("unittest_speller() success\n");
}
#endif