This repository has been archived by the owner on Dec 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
weightedentry.e
141 lines (121 loc) · 3.54 KB
/
weightedentry.e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include "slick.sh"
class WeightedEntry
{
int m_intrinsic_char_weight[] = null;
int m_char_weight[] = null;
int m_total_weight = 0;
_str *m_text;
int m_lastslash = 0;
WeightedEntry()
{
m_text = null;
}
void set_text(_str *text)
{
m_text = null;
m_text = text;
m_lastslash = lastpos(FILESEP, *m_text);
}
void clear_weight()
{
m_char_weight = null;
m_total_weight = 0;
}
void calc_intrinsic_weights()
{
/*
Each character matched is weighted according to the following heruistics:
4 points for the first character, or for a character immediately
following a '/' or '\\'.
3 points a character following a '_', or a capital letter immediately
following a lowercase letter.
2 points for a character following a '.'.
1 point for any other character.
*/
_str ch;
int pchpos;
int chpos, last = m_text->_length();
for (chpos = 1; chpos <= last; ++chpos)
{
m_intrinsic_char_weight[chpos] = 1;
pchpos = chpos-1;
_str pch = (pchpos) ? substr(*m_text, pchpos, 1) : '';
if (chpos == 1 || pch == FILESEP)
m_intrinsic_char_weight[chpos] = 4;
else if (pch == '.')
m_intrinsic_char_weight[chpos] = 2;
else
{
ch = substr(*m_text, chpos, 1);
if (pch == '_' || (pch == lowcase(pch) && ch == upcase(ch)))
m_intrinsic_char_weight[chpos] = 3;
}
// filenames are weighted double
if (chpos > m_lastslash)
m_intrinsic_char_weight[chpos] *= 2;
}
}
private int weight_char_at_pos(int chpos)
{
m_char_weight[chpos] = m_intrinsic_char_weight[chpos];
// give more weight to contiguous blocks
int pchpos = chpos - 1;
if (pchpos && m_char_weight[pchpos] != null &&
m_char_weight[pchpos] > m_char_weight[chpos])
{
m_char_weight[chpos] += m_char_weight[pchpos];
_str ch = substr(*m_text, chpos+1, 1);
// give more weight to last char of contiguous block if at word end
if (pos("[."FILESEP" ]", ch, 1, "U"))
++m_char_weight[chpos];
}
return m_char_weight[chpos];
}
void weight_match(_str pattern)
{
clear_weight();
if (pattern._length() == 0) return;
// find 'pattern' in 'm_text', and store indices of
// matching characters in 'matches'.
int matches[];
int chpos = m_text->_length();
_str ch;
int i;
for (i = pattern._length(); i >= 1; --i)
{
if (chpos < 1) return;
ch = substr(pattern, i, 1);
chpos = lastpos(ch, *m_text, chpos, "I");
if (!chpos) return;
matches[i] = chpos;
--chpos;
}
// all characters in 'pattern' have been found.
// Calculate the weights and update the state.
int match;
foreach (match in matches)
m_total_weight += weight_char_at_pos(match);
return;
}
};
void bucketsort(WeightedEntry* (&entries)[])
{
WeightedEntry* buckets[][];
// distribution
WeightedEntry *entry = null;
foreach (entry in entries)
{
WeightedEntry *(*bucket)[] = &buckets[entry->m_total_weight];
(*bucket)[bucket->_length()] = entry;
}
entries = null;
// aggregation
int i;
for (i = buckets._length(); i >= 0; --i)
{
foreach (entry in buckets[i])
{
entries[entries._length()] = entry;
}
}
}