In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 读取原始文件（假设没有表头）
df = pd.read_csv(
    "Medical_Word_Elements.csv",
    header=None,
    names=["morpheme", "meaning_1", "meaning_2", "meaning_3", "meaning_4"],
)

# 合并所有 meaning 列
df["meaning"] = df[["meaning_1", "meaning_2", "meaning_3", "meaning_4"]].apply(
    lambda x: ",".join([str(i) for i in x if pd.notna(i)]), axis=1
)

# 清理多余逗号（可选）
df["meaning"] = df["meaning"].str.replace(",,", ",")

# 只保留 morpheme 和 meaning
df = df[["morpheme", "meaning"]]

# 保存为新的 CSV 文件
df.to_csv("Medical_Word_Elements_Cleaned.csv", index=False, encoding="utf-8")

print("处理完成！已保存为 Medical_Word_Elements_Cleaned.csv")


处理完成！已保存为 Medical_Word_Elements_Cleaned.csv


In [1]:
import pandas as pd
df = pd.read_csv("Medical_Word_Elements_Cleaned.csv")
print(df.head(10))

     morpheme                                   meaning
0          a-                              not, without
1         ab-                                 away from
2   abdomin/o                                   abdomen
3         -ac                             pertaining to
4  acetabul/o               acetabulum, part of hipbone
5     acous/o                            sound, hearing
6       acr/o                          top, extremities
7    acromi/o  acromion, extension of the shoulder bone
8     -acusia                abnormal hearing condition
9     -acusis                abnormal hearing condition


In [3]:
import pandas as pd

# 读取CSV文件
df = pd.read_csv("Medical_Word_Elements_Cleaned.csv")

# 要添加/更新的词素列表
morphemes_to_update = [
    {'morpheme': '-kinesia', 'meaning': 'movement'},
    {'morpheme': '-metry', 'meaning': 'process of measuring'},
    {'morpheme': '-tonia', 'meaning': 'tone, pressure'},
    {'morpheme': 'axill/o', 'meaning': 'armpit'},
    {'morpheme': 'cardiomy/o', 'meaning': 'heart muscle'},
    {'morpheme': 'faci/o', 'meaning': 'face'},
    {'morpheme': 'hermi/o', 'meaning': 'hernia'},
    {'morpheme': 'is/o', 'meaning': 'equal, same'},
    {'morpheme': 'myocardi/o', 'meaning': 'heart muscle'},
    {'morpheme': 'poplit/o', 'meaning': 'back of the knee'},
    {'morpheme': 'quadri-', 'meaning': 'four'},
    {'morpheme': 'sphincter/o', 'meaning': 'sphincter'},
    {'morpheme': 'spher/o', 'meaning': 'sphere, ball'},
    {'morpheme': 'troph/o', 'meaning': 'nourishment'}
]

# 遍历每个词素
for item in morphemes_to_update:
    morpheme = item['morpheme']
    meaning = item['meaning']
    
    # 检查词素是否已存在
    if morpheme in df['morpheme'].values:
        # 更新现有记录
        df.loc[df['morpheme'] == morpheme, 'meaning'] = meaning
    else:
        # 添加新记录
        new_row = pd.DataFrame([{'morpheme': morpheme, 'meaning': meaning}])
        df = pd.concat([df, new_row], ignore_index=True)

# 保存更新后的DataFrame
df.to_csv("Medical_Word_Elements_Cleaned.csv", index=False)

print(df.tail(5))


        morpheme           meaning
730     poplit/o  back of the knee
731      quadri-              four
732  sphincter/o         sphincter
733      spher/o      sphere, ball
734      troph/o       nourishment


In [3]:
# 删除相同项
duplicate_morphemes = df[df.duplicated(subset=['morpheme'])]
print("Duplicate morphemes:\n", duplicate_morphemes)
df_no_duplicates = df.drop_duplicates()
df_no_duplicates.to_csv('Medical_Word_Elements_Cleaned.csv', index=False)

Duplicate morphemes:
     morpheme                         meaning
560   spin/o  spine, the column of back bone


In [None]:
import pandas as pd
df = pd.read_csv('Medical_Word_Elements_Cleaned.csv',sep = ',')
# print(len(df))


Index(['morpheme', 'meaning'], dtype='object')
