In [3]:
%pip install googletrans==4.0.0-rc1 openpyxl



In [None]:
import pandas as pd
import requests
from googletrans import Translator

def get_synonyms(word, max_results=3):
    try:
        response = requests.get(f"https://api.datamuse.com/words?ml={word}")
        if response.status_code == 200:
            data = response.json()
            return [item['word'] for item in data[:max_results]]
        else:
            return ["API error"]
    except Exception as e:
        return [f"Error: {e}"]

In [6]:
import re

file_path = "./Greg Mat Vocab List (32 Groups, 960 Words).xlsx"
xls = pd.ExcelFile(file_path)

# Parse the main sheet
df_raw = xls.parse(xls.sheet_names[0])

# Extract row indices where groups start
group_start_indices = df_raw.map(lambda x: isinstance(x, str) and re.match(r"Group\s*\d+", x)).any(axis=1)
group_start_rows = df_raw[group_start_indices].index.tolist()

# Add one past the last index for slicing
group_start_rows.append(len(df_raw))

# Initialize a container for group-separated DataFrames
group_dfs = []

# Iterate through each segment and extract group columns
for i in range(len(group_start_rows) - 1):
    start = group_start_rows[i]
    end = group_start_rows[i + 1]
    block = df_raw.iloc[start:end]

    # Each group occupies every 2 columns: (group name, word list)
    for col in range(0, block.shape[1], 2):
        group_name = block.iloc[0, col]
        if isinstance(group_name, str) and group_name.startswith("Group"):
            words = block.iloc[2:, col].dropna().tolist()
            words = [w for w in words if isinstance(w, str) and w.isalpha()]
            for word in words:
                group_dfs.append({
                    "Group": group_name,
                    "Word": word,
                    "Synonym1": "",
                    "Synonym2": "",
                    "Synonym3": "",
                    "Korean": ""
                })

# Convert to a single DataFrame
final_grouped_df = pd.DataFrame(group_dfs)

print(final_grouped_df)

         Group        Word Synonym1 Synonym2 Synonym3 Korean
0      Group 1      abound                                  
1      Group 1   amorphous                                  
2      Group 1     austere                                  
3      Group 1       belie                                  
4      Group 1  capricious                                  
...        ...         ...      ...      ...      ...    ...
1080  Group 37     wanting                                  
1081  Group 37     willful                                  
1082  Group 38   querulous                                  
1083  Group 38       phony                                  
1084  Group 38       check                                  

[1085 rows x 6 columns]


In [8]:
# Translate each word into Korean using googletrans and find synonyms using API
words = final_grouped_df['Word'].tolist()
translator = Translator()
translated, syn1, syn2, syn3 = [], [], [], []

for i, word in enumerate(words):
    print(i, word, i/len(words)*100)
    # Translate
    try:
        ko = translator.translate(word, src='en', dest='ko').text
    except:
        ko = ""

    # Synonyms
    synonyms = get_synonyms(word)
    syn1.append(synonyms[0] if len(synonyms) > 0 else "")
    syn2.append(synonyms[1] if len(synonyms) > 1 else "")
    syn3.append(synonyms[2] if len(synonyms) > 2 else "")
    translated.append(ko)

0 abound 0.0
1 amorphous 0.09216589861751152
2 austere 0.18433179723502305
3 belie 0.2764976958525346
4 capricious 0.3686635944700461
5 cerebral 0.4608294930875576
6 congenial 0.5529953917050692
7 conspicuous 0.6451612903225806
8 cursory 0.7373271889400922
9 daunting 0.8294930875576038
10 deify 0.9216589861751152
11 didactic 1.0138248847926268
12 disseminate 1.1059907834101383
13 feasible 1.19815668202765
14 flout 1.2903225806451613
15 homogeneous 1.3824884792626728
16 humdrum 1.4746543778801844
17 insipid 1.566820276497696
18 loquacious 1.6589861751152075
19 misanthropic 1.7511520737327189
20 misnomer 1.8433179723502304
21 negligent 1.935483870967742
22 obsequious 2.0276497695852536
23 placate 2.1198156682027647
24 proclivity 2.2119815668202767
25 puerile 2.3041474654377883
26 quixotic 2.3963133640553
27 spendthrift 2.488479262672811
28 taciturn 2.5806451612903225
29 wary 2.672811059907834
30 adulterate 2.7649769585253456
31 advocate 2.857142857142857
32 aggrandize 2.9493087557603688


In [18]:
final_grouped_df.loc[0:len(translated)-1, 'Synonym1'] = syn1
final_grouped_df.loc[0:len(translated)-1, 'Synonym2'] = syn2
final_grouped_df.loc[0:len(translated)-1, 'Synonym3'] = syn3
final_grouped_df.loc[0:len(translated)-1, 'Korean'] = translated

print(final_grouped_df)

final_grouped_df.to_csv('./GRE_Verbal.csv', index=False, encoding='utf-8')

         Group        Word     Synonym1        Synonym2      Synonym3 Korean
0      Group 1      abound        burst         bristle        thrive  많이 있다
1      Group 1   amorphous    shapeless  noncrystalline      formless    비정질
2      Group 1     austere        stern          severe         stark     남풍
3      Group 1       belie       negate      contradict  misrepresent    믿는다
4      Group 1  capricious     freakish   unpredictable     whimsical  변덕스러운
...        ...         ...          ...             ...           ...    ...
1080  Group 37     wanting       absent         missing     deficient   이 없이
1081  Group 37     willful  self-willed      headstrong     voluntary   고집 센
1082  Group 38   querulous  complaining    complaintive         whiny     쿼리
1083  Group 38       phony       phoney            fake         bogus    위조품
1084  Group 38       check        match           hitch     check out   확인하다

[1085 rows x 6 columns]
