Skip to content

Commit

Permalink
Merge pull request #7 from saknarak/master
Browse files Browse the repository at this point in the history
add data2.json
  • Loading branch information
earthchie committed Apr 9, 2017
2 parents aacc1c3 + 904438d commit 96f0fe9
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 7 deletions.
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ <h1>Auto Complete ที่อยู่ อย่างที่มันคว

<script type="text/javascript">
$.Thailand({
database: 'jquery.Thailand.js/data.json',
database: 'jquery.Thailand.js/data3.json',
onComplete: function(){
$('#loader, #address').toggle();
}
Expand Down
110 changes: 110 additions & 0 deletions jquery.Thailand.js/convert.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
มีเพียง 4 รายการนี้ที่มี zipcode มากกว่า 1
ประจวบคีรีขันธ์ ปราณบุรี ปราณบุรี [ 77120, 77160 ]
ประจวบคีรีขันธ์ ปราณบุรี วังก์พง [ 77120, 77160 ]
ประจวบคีรีขันธ์ ปราณบุรี หนองตาแต้ม [ 77120, 77160 ]
ประจวบคีรีขันธ์ ปราณบุรี เขาจ้าว [ 77120, 77160 ]
ประจวบคีรีขันธ์ ปราณบุรี เขาน้อย [ 77120, 77160 ]
ประจวบคีรีขันธ์ สามร้อยยอด สามร้อยยอด [ 77120, 77160 ]
*/
let tree = require('./data.json')
let wordcut = require('wordcut')
const fs = require('fs')

let cnt = 0
let dict = {}
let words = {}

wordcut.init()

// pass 1 generate dict and word list
tree.forEach(p => {
addToDict(p[0])
p[1].forEach(a => {
addToDict(a[0])
a[1].forEach(d => {
addToDict(d[0])
})
})
})


console.log('words=', Object.keys(words).length)
// processs words sorting top 52 most occurrences
let newWords = {}
Object.keys(words).sort((a, b) => {
// order by descending
if (words[a] === words[b]) {
return 0
}
if (words[a] < words[b]) {
return 1
}
return -1
})
.filter((word, i) => i < 52)
.forEach((word, i) => {
newWords[String.fromCharCode(i < 26 ? 65 + i : 97 + i - 26)] = word
})

//console.log('most freq word=', Object.keys(newWords).map(idx => [newWords[idx], words[newWords[idx]]]))
fs.writeFileSync('words.txt',
'|คำ|จำนวนครั้ง|\n' +
'|---|---:|\n' +
Object.keys(newWords).map(idx => '|' + newWords[idx] + '|' + words[newWords[idx]] + '|').join('\n')
)

// process dict keep only 2 or more occurences
let newDict = {}
let lookup = []
Object.keys(dict).filter(key => dict[key] > 1).forEach(key => {
key = changeWord(key)
newDict[key] = lookup.length
lookup.push(key)
})


// pass 2: rebuild tree
let newTree = tree.map(p =>
[findIndex(p[0]), p[1].map(a =>
[findIndex(a[0]), a[1].map(d =>
[findIndex(d[0]), d[1].length === 1 ? d[1][0] : d[1]]
)]
)]
)

fs.writeFileSync('./data3.json', JSON.stringify({
data: newTree,
lookup: lookup.join('|'),
words: Object.keys(newWords).map((ch) => newWords[ch]).join('|'),
}))

function addToDict(key) {
if (typeof dict[key] === 'undefined') {
dict[key] = 1
} else {
dict[key]++
}
let wordList = wordcut.cut(key).split('|')
wordList.forEach((word) => {
if (typeof words[word] === 'undefined') {
words[word] = 1
} else {
words[word]++
}
})
}

function findIndex(key) {
key = changeWord(key)
return typeof newDict[key] === 'number' ? newDict[key] : key
}

function changeWord(text) {
Object.keys(newWords).forEach(ch => {
text = text.replace(newWords[ch], ch)
})
return text
}
1 change: 1 addition & 0 deletions jquery.Thailand.js/data2.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions jquery.Thailand.js/data3.json

Large diffs are not rendered by default.

34 changes: 29 additions & 5 deletions jquery.Thailand.js/jquery.Thailand.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ $.Thailand = function (options) {

options = $.extend({

database: './data.json',
database: './data3.json',
autocomplete_size: 20,
onComplete: function () {},

Expand Down Expand Up @@ -143,6 +143,30 @@ $.Thailand = function (options) {
});

function preprocess (data) {
var lookup = []
var words = []
var useLookup = false
if (data.lookup && data.words) {
// compact with dictionary and lookup
useLookup = true
lookup = data.lookup.split('|')
words = data.words.split('|')
data = data.data
}
function t(text) {
function repl(m) {
var ch = m.charCodeAt(0)
return words[ch < 97 ? ch - 65 : 26 + ch - 97]
}
if (!useLookup) {
return text
}
if (typeof text === 'number') {
text = lookup[text]
}
return text.replace(/[A-Z]/ig, repl)
}

if (!data[0].length) {
// non-compacted database
return data;
Expand All @@ -158,12 +182,12 @@ $.Thailand = function (options) {
var districtList = amphurEntry[1];
districtList.forEach(function (districtEntry) {
var district = districtEntry[0];
var zipCodeList = districtEntry[1];
var zipCodeList = districtEntry[1] instanceof Array ? districtEntry[1] : [districtEntry[1]];
zipCodeList.forEach(function (zipCode) {
expanded.push({
d: district,
a: amphur,
p: province,
d: t(district),
a: t(amphur),
p: t(province),
z: zipCode
});
});
Expand Down
2 changes: 1 addition & 1 deletion jquery.Thailand.js/jquery.Thailand.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions jquery.Thailand.js/words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
|คำ|จำนวนครั้ง|
|---|---:|
|หนอง|591|
|บ้าน|468|
|นา|371|
|บาง|346|
|ท่า|301|
|เมือง|226|
|วัง|215|
|แม่|165|
|คลอง|163|
|ทุ่ง|159|
|โคก|156|
|ห้วย|150|
|ใหญ่|150|
|ทอง|150|
|ศรี|129|
|เกาะ|126|
|น้ำ|125|
|โนน|122|
|เขา|120|
|ดอน|117|
|ยาง|114|
|ป่า|113|
|พระ|112|
|บัว|107|
|โพธิ์|105|
|คำ|97|
|ดง|93|
|แก้ว|87|
|หลวง|87|
|ช้าง|83|
|หัว|82|
|น้อย|81|
|บ่อ|76|
|ไผ่|75|
|ทราย|75|
|แดง|67|
|ม่วง|65|
|ตา|65|
|กุด|64|
|งาม|63|
|เหนือ|62|
|หิน|62|
|สาม|62|
|ปาก|61|
|โพน|59|
|ชัย|59|
|ใหม่|58|
|ลำ|57|
|กลาง|57|
|บึง|56|
|ไทร|53|
|เวียง|53|

0 comments on commit 96f0fe9

Please sign in to comment.