Skip to content

Commit

Permalink
- Upgraded to MSVC2010.
Browse files Browse the repository at this point in the history
- A new function "check" to examine the existence of a similar string.
- Reduced a warning.
  • Loading branch information
Naoaki Okazaki committed Feb 14, 2011
1 parent 89449a1 commit e8b6adf
Show file tree
Hide file tree
Showing 6 changed files with 984 additions and 600 deletions.
2 changes: 1 addition & 1 deletion include/simstring/cdbpp.h
Expand Up @@ -211,7 +211,7 @@ class builder_base
*/
builder_base(std::ofstream& os) : m_os(os)
{
m_begin = m_os.tellp();
m_begin = (uint32_t)m_os.tellp();
m_cur = get_data_begin();
m_os.seekp(m_begin + m_cur);
}
Expand Down
54 changes: 52 additions & 2 deletions include/simstring/simstring.h
Expand Up @@ -635,7 +635,7 @@ class ngramdb_reader_base
* @param results The SIDs that satisfies the overlap join.
*/
template <class measure_type, class query_type>
void overlapjoin(const query_type& query, double alpha, results_type& results)
bool overlapjoin(const query_type& query, double alpha, results_type& results, bool check)
{
int i;
const int qsize = query.size();
Expand Down Expand Up @@ -727,6 +727,9 @@ class ngramdb_reader_base

if (mmin <= num) {
// This candidate has sufficient matches.
if (check) {
return true;
}
results.push_back(itc->value);
} else if (num + (qsize - i - 1) >= mmin) {
// This candidate still has the chance.
Expand All @@ -746,11 +749,16 @@ class ngramdb_reader_base
typename candidates_type::const_iterator itc;
for (itc = cands.begin();itc != cands.end();++itc) {
if (mmin <= itc->num) {
if (check) {
return true;
}
results.push_back(itc->value);
}
}
}
}

return !results.empty();
}

protected:
Expand Down Expand Up @@ -970,7 +978,7 @@ class reader
gen(query, std::back_inserter(ngrams));

typename base_type::results_type results;
base_type::overlapjoin<measure_type>(ngrams, alpha, results);
base_type::overlapjoin<measure_type>(ngrams, alpha, results, false);

typename base_type::results_type::const_iterator it;
const char* strings = &m_strings[0];
Expand All @@ -980,6 +988,48 @@ class reader
}
}

template <class string_type>
void check(
const string_type& query,
int measure,
double alpha
)
{
switch (measure) {
case exact:
this->check<simstring::measure::exact>(query, alpha);
break;
case dice:
this->check<simstring::measure::dice>(query, alpha);
break;
case cosine:
this->check<simstring::measure::cosine>(query, alpha);
break;
case jaccard:
this->check<simstring::measure::jaccard>(query, alpha);
break;
case overlap:
this->check<simstring::measure::overlap>(query, alpha);
break;
}
}

template <class measure_type, class string_type>
bool check(
const string_type& query,
double alpha
)
{
typedef std::vector<string_type> ngrams_type;
typedef typename string_type::value_type char_type;

ngram_generator_type gen(m_ngram_unit, m_be);
ngrams_type ngrams;
gen(query, std::back_inserter(ngrams));

typename base_type::results_type results;
return base_type::overlapjoin<measure_type>(ngrams, alpha, results, true);
}

protected:
inline uint32_t read_uint32(const char* p) const
Expand Down
6 changes: 3 additions & 3 deletions simstring.sln
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "frontend", "frontend\frontend.vcproj", "{DE6A2D5D-8AFA-4471-A9A2-C8E671CCC301}"
Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "frontend", "frontend\frontend.vcxproj", "{DE6A2D5D-8AFA-4471-A9A2-C8E671CCC301}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down

0 comments on commit e8b6adf

Please sign in to comment.