/
video_word.go
69 lines (58 loc) · 1.59 KB
/
video_word.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright 2023 The casbin Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package object
import (
"fmt"
"unicode"
"github.com/casibase/casibase/util"
"github.com/wangbin/jiebago"
)
var seg *jiebago.Segmenter = nil
func isPunctuation(r rune) bool {
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
}
func isNumber(s string) bool {
for _, r := range s {
if !unicode.IsNumber(r) {
return false
}
}
return true
}
func (v *Video) PopulateWordCountMap() error {
if len(v.Segments) == 0 {
return nil
}
dictPath := "data/dict.txt"
if !util.FileExist(dictPath) {
return fmt.Errorf("Cannot generate word cloud, the dict file: [%s] does not exist", dictPath)
}
if seg == nil {
seg = &jiebago.Segmenter{}
err := seg.LoadDictionary(dictPath)
if err != nil {
return err
}
}
v.WordCountMap = map[string]int{}
for _, segment := range v.Segments {
words := seg.Cut(segment.Text, true)
for word := range words {
if len(word) > 3 && !isPunctuation([]rune(word)[0]) && !isNumber(word) {
v.WordCountMap[word]++
}
}
}
return nil
}