In [11]:
import pandas as pd
from collections import defaultdict

# Define file paths for each dictionary
synonym_file = './dict_synonym.txt'
antonym_file = './dict_antonym.txt'

# Load synonyms into a dictionary where each word is mapped to all its synonyms
synonym_dict = defaultdict(set)
with open(synonym_file, 'r', encoding='utf-8') as f:
    for line in f:
        parts = line.strip().split('--')
        if len(parts) == 2:
            _, words = parts
            word_list = words.split()
            for word in word_list:
                synonym_dict[word].update(w for w in word_list if w != word)

# Convert the synonym dictionary to a DataFrame
synonym_grouped = [{'Headword': word, 'Synonyms': ', '.join(sorted(syns))} 
                   for word, syns in synonym_dict.items()]
synonym_df = pd.DataFrame(synonym_grouped)

# Load antonyms into a dictionary where each word maps to its antonym
antonym_dict = defaultdict(set)
lines = 0
with open(antonym_file, 'r', encoding='utf-8') as f:
    for line in f:
        lines+=1
        parts = line.strip().split('——')
        if len(parts) == 2:
            word1, word2 = parts
            antonym_dict[word1].add(word2)
            antonym_dict[word2].add(word1)

print(f'antonym_dict len {len(antonym_dict)} vs line count: {lines}')
# Convert the antonym dictionary to a DataFrame
antonym_grouped = [{'Headword': word, 'Antonyms': ', '.join(sorted(ants))} 
                   for word, ants in antonym_dict.items()]
antonym_df = pd.DataFrame(antonym_grouped)

import jieba
import pandas as pd

# Define the path to dict_negative.txt
negative_file_path = './dict_negative.txt'

# Read and segment phrases on each line
negative_phrases = []
with open(negative_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        phrase = line.strip().split('\t')[0]  # Extract only the phrase part
        segmented = list(jieba.cut(phrase))   # Segment the phrase with Jieba
        negative_phrases.append({'Phrase': phrase, 'Segments': ' '.join(segmented)})

# Convert to DataFrame for easy viewing
negative_phrases_df = pd.DataFrame(negative_phrases)
negative_phrases_df.to_csv('segmented_negative_phrases.csv', index=False, encoding='utf-8')

print("Segmented phrases saved to 'segmented_negative_phrases.csv'")



antonym_dict len 0 vs line count: 18797
Segmented phrases saved to 'segmented_negative_phrases.csv'


In [8]:
pip install jieba pandas


Collecting jieba
  Using cached jieba-0.42.1-py3-none-any.whl
Installing collected packages: jieba
Successfully installed jieba-0.42.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip
