In [1]:
import cntext as ct

help(ct.stats)

Help on module cntext.stats in cntext:

NAME
    cntext.stats

FUNCTIONS
    cn_seg_sent(text)
    
    dict_pkl_list()
        Get the list of cntext built-in dictionaries (pkl format)
    
    load_pkl_dict(file, is_builtin=True)
        load pkl dictionary file,
        :param file: pkl file path
        :param is_builtin: Whether it is the built-in pkl file of cntext. Default True
    
    readability(text, zh_adjconj=None, language='chinese')
        text readability, the larger the indicator, the higher the complexity of the article and the worse the readability.
        :param text: text string
        :param zh_adjconj Chinese conjunctions and adverbs, receive list data type. By default, the built-in dictionary of cntext is used
        :param language: "chinese" or "english"; default is "chinese"
        ------------
        【English readability】english_readability = 4.71 x (characters/words) + 0.5 x (words/sentences) - 21.43；
        【Chinese readability】  Refer 【徐巍,姚振晔,陈冬华.中

In [1]:
import cntext as ct

text = '致力于以零文章处理费或订阅费发布优质研究软件。'
print(ct.term_freq(text, lang='chinese'))
print(ct.readability(text))

Building prefix dict from the default dictionary ...
Loading model from cache /var/folders/sc/3mnt5tgs419_hk7s16gq61p80000gn/T/jieba.cache
Loading model cost 0.596 seconds.
Prefix dict has been built successfully.


Counter({'致力于': 1, '文章': 1, '处理费': 1, '订阅费': 1, '发布': 1, '优质': 1, '研究': 1, '软件': 1})
{'readability1': 23.0, 'readability2': 0.15384615384615385, 'readability3': 11.576923076923077}


In [4]:
import cntext as ct

text = 'Committed to publishing quality research software with zero article processing charges or subscription fees.'
print(ct.term_freq(text, lang='english'))
print(ct.readability(text, lang='english'))

Counter({'committed': 1, 'publishing': 1, 'quality': 1, 'research': 1, 'software': 1, 'zero': 1, 'article': 1, 'processing': 1, 'charges': 1, 'subscription': 1, 'fees.': 1})
{'readability': 19.982}


In [None]:
## built-in dictionary list

In [8]:
import cntext as ct

ct.dict_pkl_list()

['DUTIR.pkl',
 'HOWNET.pkl',
 'sentiws.pkl',
 'ChineseFinancialFormalUnformalSentiment.pkl',
 'ANEW.pkl',
 'LSD2015.pkl',
 'NRC.pkl',
 'geninqposneg.pkl',
 'HuLiu.pkl',
 'AFINN.pkl',
 'ADV_CONJ.pkl',
 'LoughranMcDonald.pkl',
 'STOPWORDS.pkl']

In [None]:
print(ct.load_pkl_dict('NRC.pkl'))

In [15]:
help(ct.sentiment)

Help on function sentiment in module cntext.stats:

sentiment(text, diction, language='chinese')
    calculate the occurrences of each emotional category words in text;
    the complex influence of intensity adverbs and negative words on emotion is not considered,
    :param text:  text sring
    :param diction:  emotion dictionary；
    :param language: 语言类型，"chinese"或"english"，默认"chinese"
    
    diction = {'category1':  'category1 emotion word list',
               'category2':  'category2 emotion word list',
               'category3':  'category3 emotion word list',
                ...
               }
    :return:



In [4]:
text = '我今天得奖了，很高兴，我要将快乐分享大家。'

ct.sentiment(text=text,
             diction=ct.load_pkl_dict('DUTIR.pkl')['DUTIR'],
             lang='chinese')

{'哀_num': 0,
 '好_num': 0,
 '惊_num': 0,
 '惧_num': 0,
 '乐_num': 2,
 '怒_num': 0,
 '恶_num': 0,
 'stopword_num': 8,
 'word_num': 14,
 'sentence_num': 1}

In [5]:
diction = {'pos': ['高兴', '快乐', '分享'],
           'neg': ['难过', '悲伤'],
           'adv': ['很', '特别']}

text = '我今天得奖了，很高兴，我要将快乐分享大家。'
ct.sentiment(text=text, 
             diction=diction, 
             lang='chinese')

{'pos_num': 3,
 'neg_num': 0,
 'adv_num': 1,
 'stopword_num': 8,
 'word_num': 14,
 'sentence_num': 1}

In [13]:
text = 'What a happy day!'

ct.sentiment(text=text,
             diction=ct.load_pkl_dict('NRC.pkl')['NRC'],
             lang='english')

{'anger_num': 0,
 'anticipation_num': 1,
 'disgust_num': 0,
 'fear_num': 0,
 'joy_num': 1,
 'negative_num': 0,
 'positive_num': 1,
 'sadness_num': 0,
 'surprise_num': 0,
 'trust_num': 1,
 'stopword_num': 1,
 'word_num': 5,
 'sentence_num': 1}

In [12]:
import cntext as ct

text = 'What a happy day!'


diction = {'Pos': ['happy', 'good'],
           'Neg': ['bad', 'terrible'],
           'Adv': ['very']}

ct.sentiment(text=text,
             diction=diction,
             lang='english')

{'Pos_num': 1,
 'Neg_num': 0,
 'Adv_num': 0,
 'stopword_num': 1,
 'word_num': 5,
 'sentence_num': 1}

In [1]:
import cntext as ct

# load the concreteness.pkl dictionary file
concreteness_df = ct.load_pkl_dict('concreteness.pkl')
concreteness_df.head()

Unnamed: 0,word,valence
0,roadsweeper,4.85
1,traindriver,4.54
2,tush,4.45
3,hairdress,3.93
4,pharmaceutics,3.77


In [2]:
reply = "I'll go look for that"

score=ct.sentiment_by_valence(text=reply, 
                              diction=concreteness_df, 
                              lang='english')
score

1.856

In [3]:
employee_replys = ["I'll go look for that",
                   "I'll go search for that",
                   "I'll go search for that top",
                   "I'll go search for that t-shirt",
                   "I'll go look for that t-shirt in grey",
                   "I'll go search for that t-shirt in grey"]

for idx, reply in enumerate(employee_replys):
    score=ct.sentiment_by_valence(text=reply, 
                                  diction=concreteness_df, 
                                  lang='english')
    
    template = "Concreteness Score: {score:.2f} | Example-{idx}: {exmaple}"
    
    print(template.format(score=score, 
                          idx=idx, 
                          exmaple=reply))

Concreteness Score: 1.86 | Example-0: I'll go look for that
Concreteness Score: 1.86 | Example-1: I'll go search for that
Concreteness Score: 2.21 | Example-2: I'll go search for that top
Concreteness Score: 2.04 | Example-3: I'll go search for that t-shirt
Concreteness Score: 2.37 | Example-4: I'll go look for that t-shirt in grey
Concreteness Score: 2.37 | Example-5: I'll go search for that t-shirt in grey
