In [8]:
#!pip install jieba

Collecting jieba
  Downloading jieba-0.42.1.tar.gz (19.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.2/19.2 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: jieba
  Building wheel for jieba (setup.py) ... [?25ldone
[?25h  Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314458 sha256=87942c7b231ee3781a0d8438a4989d7738e844d050cf596ef94a2feb98196530
  Stored in directory: /Users/tmr1137/Library/Caches/pip/wheels/08/a1/a3/5c8ac52cc2f5782ffffc34c95c57c8e5ecb3063dc69541ee7c
Successfully built jieba
Installing collected packages: jieba
Successfully installed jieba-0.42.1


In [9]:
#!pip install liwc

Collecting liwc
  Downloading liwc-0.5.0-py2.py3-none-any.whl.metadata (2.7 kB)
Downloading liwc-0.5.0-py2.py3-none-any.whl (5.1 kB)
Installing collected packages: liwc
Successfully installed liwc-0.5.0


In [46]:
# Get the current working directory (i.e., the folder where this script is being run)
import os
os.getcwd()

'/Users/tmr1137/Documents'

Load the LIWC dictionary

In [47]:
# Import necessary libraries
import jieba # for Chinese word segmentation
from collections import Counter, defaultdict # for counting and managing dictionary data structures
import json # for loading the LIWC dictionary file (usually in JSON format)

In [48]:
# Category code → category name mapping
category_code_to_name = {
    "127": "negemo", # negative emotion
    "130": "sad", # sadness
    "121": "social", # social processes
    "125": "cogproc", # cognitive processes
    "1": "i" # first-person singular pronoun ("I")
}

In [49]:
# Load a .dic file and map category codes to category names
def load_mapped_liwc(filepath, category_map):
    result = defaultdict(list)
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                continue # skip lines that don’t have both a word and category codes
            word = parts[0]
            codes = parts[1:]
            for code in codes:
                if code in category_map:
                    result[category_map[code]].append(word)
    return dict(result)

# Usage (after running this, liwc_dict will be ready to use)
liwc_dict = load_mapped_liwc("sc_liwc.dic", category_code_to_name)

In [50]:
# Analyze a piece of text and calculate LIWC scores
def liwc_score(text, liwc_dict):
    tokens = list(jieba.cut(text)) # Perform Chinese word segmentation
    counter = Counter()
    for token in tokens:
        for cat, words in liwc_dict.items():
            if token in words:
                counter[cat] += 1 # Count the token for the corresponding LIWC category
    return counter # Returns a dictionary of category counts

In [51]:
# Multi-turn dialogue analysis (only analyze user input, assuming user speaks on even-numbered lines)
def analyze_dialogue_liwc(dialogue, liwc_dict):
    results = []
    for i, utterance in enumerate(dialogue):
        if i % 2 == 0: # Only process even-numbered lines (user input)
            score = liwc_score(utterance, liwc_dict)
            results.append((utterance, score)) # Store the utterance and its LIWC score
    return results # Returns a list of (utterance, score) tuples

In [52]:
# Display the analysis results
def render_results(results):
    for text, score in results:
        print(f"🗣 {text}") # Print the original user utterance
        for k, v in score.items():
            print(f"  {k}: {v}") # Print each LIWC category and its count
        print("———") # Separator between utterances

In [53]:
# Example dialogue (user speaks on even-numbered lines)
dialogue = [
    "我最近真的很累，什么都不想做。",
    "为什么会这样？",
    "也不知道，就是每天都不想起床。",
    "你有没有试过出去走走？",
    "走不动，感觉身体都沉重了。",
    "那你有没有和朋友聊聊？",
    "没有，我不想麻烦别人。",
    "我其实一直都觉得很孤独。",
    "也许你需要去看看心理医生。",
    "我怕别人觉得我很奇怪。"
]

# Run the LIWC analysis on the user input
results = analyze_dialogue_liwc(dialogue, liwc_dict)

# Display the results
render_results(results)

🗣 我最近真的很累，什么都不想做。
———
🗣 也不知道，就是每天都不想起床。
  i: 1
———
🗣 走不动，感觉身体都沉重了。
  cogproc: 1
  negemo: 1
  i: 1
———
🗣 没有，我不想麻烦别人。
  i: 1
  cogproc: 1
  negemo: 1
———
🗣 也许你需要去看看心理医生。
  social: 1
———


Debug and refind the C-LIWC dictionary

In [54]:
# optional debug function
def debug_liwc_hits(text, liwc_dict):
    tokens = list(jieba.cut(text))
    match = defaultdict(list)
    for token in tokens:
        for cat, words in liwc_dict.items():
            if token in words:
                match[cat].append(token)
    return dict(match)

In [55]:
debug_liwc_hits("走不动，感觉身体都沉重了。", liwc_dict)

{'cogproc': ['沉重'], 'negemo': ['沉重'], 'i': ['了']}

In [56]:
'了' in liwc_dict['i'] # stopword accidentally detect as first person singular pronoun

True