# Follow-up filtering
This primarily exists such that it is unecessary to run the full extraction process again while I find additional items to filter during the cleaning process

In [1]:
import json
from pathlib import Path
import ujson
from dutchanalyzer.config import *
from dutchanalyzer.utilities.utils import *
from dutchanalyzer.utilities.json_utils import *
from dutchanalyzer.utilities.replacement_utils import *
from pathlib import Path
from dotenv import load_dotenv
import datetime
from pprint import pprint
from tqdm import tqdm

In [13]:
from dutchanalyzer.utilities.pandas_utils import *

In [2]:
today = datetime.date.today().__format__("%d-%m-%y")
current_save_folder = Path(INTERIM_DATA_DIR, 'cleaning', 'wikt', str(today))
folders = {'en': ['EEF', 'ENF'], 'nl':['NEF', 'NNF']}


for k, v in folders.items():
    for f in v:
        Path.mkdir(Path(current_save_folder, k, f), parents=True, exist_ok=True)

In [3]:
NNR_file = Path(NNR_DIR, 'NNR.jsonl')
NER_file = Path(NER_DIR, 'NER.jsonl')
EER_file = Path(EER_DIR, 'EER.jsonl')
ENR_file = Path(ENR_DIR, 'ENR.jsonl')

In [7]:
def filter_obj(obj):
    obj.pop('wikipedia', '')
    

In [8]:
def process_obj(in_file, entries_out_file, wl_code, definitions_out_file=None, batch_size=1000, break_point=-1):
    batch = []
    entries_batch = []
    error_lines = []
    with open(in_file, 'r', encoding='utf-8') as f:
        with open(entries_out_file, 'w+', encoding='utf-8') as out:
            
            for i, line in tqdm(enumerate(f)):
                if break_point > 0:
                    if i > break_point:
                        display(entries_batch)
                        break
                if line:
                    try:
                        obj = json.loads(line)
                        obj = sort_standardize_entry(obj)
                        if obj:
                            obj['wl_code'] = wl_code
                            filter_obj(obj)
                            entries_batch.append(obj)

                            if len(entries_batch) > batch_size:
                                for entry in entries_batch:
                                    json.dump(entry, out, ensure_ascii=False)
                                    out.write('\n')
                                entries_batch = []
                            if definitions_out_file:
                                word_entry = extract_words_senses(obj)
                                batch.append(word_entry)
                                if len(batch) > batch_size:
                                    with open(definitions_out_file, 'a+', encoding='utf-8') as def_out: 
                                        for entry in batch:
                                            json.dump(entry, def_out, ensure_ascii=False)
                                            def_out.write('\n')
                                        batch = []
                    except Exception as e:
                        error_lines.append((i, obj))
                        display(line)
                        print("Error on line: ", i, " Error: ", e)
                        break
            if entries_batch:
                for entry in entries_batch:
                    json.dump(entry, out, ensure_ascii=False)
                    out.write('\n')  
            if batch and definitions_out_file:
                with open(definitions_out_file, 'a+', encoding='utf-8') as def_out: 
                    for entry in batch:
                        json.dump(entry, def_out, ensure_ascii=False)
                        def_out.write('\n')
    return entries_batch, batch, error_lines

In [44]:
file = EEF_FILE
temp_out = Path(current_save_folder, 'en', 'EEF', 'EEF_temp.jsonl')
defs_file = Path(current_save_folder, 'en', 'EEF_definitions.jsonl')
wl_code = 'EEF'
entries_batch, batch, error_lines = process_obj(file, temp_out, wl_code, defs_file, 1000, 100)

0it [00:00, ?it/s]

[{'word': 'dictionary',
  'pos': 'noun',
  'lang_code': 'en',
  'lang': 'english',
  'standard_lang': 'english',
  'categories': ['English countable nouns',
   'English entries with etymology trees',
   'English entries with incorrect language header',
   'English lemmas',
   'English nouns',
   'English terms derived from Latin',
   'English terms derived from Medieval Latin',
   'English terms derived from Middle English',
   'English terms derived from Proto-Indo-European',
   'English terms derived from Proto-Italic',
   'English terms derived from the Proto-Indo-European root *dey·∏±-',
   'English terms inherited from Middle English',
   'English terms suffixed with -ary',
   'English verbs',
   'Entries with translation boxes',
   'Pages with 1 entry',
   'Pages with entries',
   'Pages with etymology trees',
   'Rhymes:English/…™k É…ôn…õ…ô…πi',
   'Rhymes:English/…™k É…ôn…õ…ô…πi/4 syllables',
   'Terms with Dutch Low Saxon translations',
   'Terms with Dutch translations',
   '

101it [00:00, 439.70it/s]


In [45]:
df = pd.DataFrame(entries_batch)

In [46]:
def sort_df_columns(df, start_cols=['word', 'pos', 'lang_code', 'standard_lang', 'senses'], end_cols=['translations', 'lang', 'wl_code'], groups=['forms', 'etymology', 'nyms', 'categories']):
    df_cols = df.columns
    start = [c for c in start_cols if c in df_cols]
    end = [c for c in end_cols if c in df_cols]
    protected_cols = start 
    
    forms_grouping = ['form_of','forms', 'alt_of', 'inflection_templates', 'derived']
    etymology_grouping = ['etymology_templates', 'etymology_text', 'etymology_tree']
    nyms_grouping = ['synonyms', 'antonyms', 'hypernyms','hyponyms', 'troponyms', 'holonyms', 'meronyms']
    categories_grouping = ['categories', 'links', 'related', 'topics']

    if groups:
        for group in groups:
            if 'forms' == group:
                forms_grouping = [c for c in forms_grouping if c in df_cols]
                protected_cols += forms_grouping
            if 'etymology' == group:
                etymology_grouping = [c for c in etymology_grouping if c in df_cols]
                protected_cols += etymology_grouping

            if 'nyms' == group:
                nyms_grouping = [c for c in nyms_grouping if c in df_cols]
                protected_cols += nyms_grouping

            if 'categories' == group:
                categories_grouping = [c for c in categories_grouping if c in df_cols]
                protected_cols += categories_grouping

    protected_cols = [c for c in protected_cols if c in df_cols]
    unprotected_cols = [c for c in df_cols if c not in protected_cols and c not in end]

    unprotected_cols.sort()
    new_cols = protected_cols + unprotected_cols + end
    df = df.loc[:, new_cols]
    return df

In [47]:
df = sort_df_columns(df)

In [48]:
def summarize_df(df: pd.DataFrame, name: str = None, num_top_values=10) -> pd.DataFrame:
    """
    Summarize a DataFrame similar to df.info(), but with:
      - shape
      - non-null count
      - unique count
      - top 10 value counts (as string)
    """
    summary_data = []

    for i, col in enumerate(df.columns):
        non_null = df[col].notna().sum()
        try: 
            unique_vals = df[col].nunique(dropna=True)
        except:
            unique_vals = 'List'
        top_vals = df[col].value_counts(dropna=False).head(num_top_values)
        top_vals_str = ", ".join(f"{idx} : {val}" for idx, val in top_vals.items())

        summary_data.append({
            "df_name": name,
            'col_num': i,
            "column": col,
            "dtype": str(df[col].dtype),
            "non_null": non_null,
            "unique": unique_vals,
            "top_vals": top_vals_str,
            "sh_rows": df.shape[0],
            "sh_cols": df.shape[1],
        })
    summary_df = pd.DataFrame(summary_data)
  
    return summary_df

In [49]:
pd.set_option('display.max_colwidth', 3) 
summarize_df(df, 'EEF', 5)

Unnamed: 0,df_name,col_num,column,dtype,non_null,unique,top_vals,sh_rows,sh_cols
0,EEF,0,word,object,101,37,"pie : 8, cat : 6, pound : 6, A : 5, crow : 5",101,27
1,EEF,1,pos,object,101,8,"noun : 51, verb : 29, adj : 10, adv : 6, intj : 2",101,27
2,EEF,2,lang_code,object,101,1,en : 101,101,27
3,EEF,3,standard_lang,object,101,1,english : 101,101,27
4,EEF,4,senses,object,101,List,"[{'glosses': [""A reference work listing words or names from one or more languages, usually ordered alphabetically, explaining each word's meanings or senses, oftentimes also containing information on its etymology, pronunciation, usage, semantic relations, translations, as well as other relevant information.""], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'coordinate_terms': [{'word': 'thesaurus'}], 'hypernyms': [{'word': 'wordbook'}], 'links': [['reference work', 'reference work'], ['word', 'word'], ['name', 'name'], ['language', 'language'], ['alphabetically', 'alphabetically'], ['meanings', 'meaning#Noun'], ['senses', 'sense#English:_any_particular_meaning_of_a_word'], ['etymology', 'etymology#Noun'], ['pronunciation', 'pronunciation#Noun'], ['usage', 'usage#English:_language'], ['translations', 'translation#Noun']]}, {'glosses': ['A reference work on a particular subject or activity in which the entries are arranged alphabetically; an alphabetical encyclopedia.'], 'categories': ['English terms with usage examples'], 'links': [['reference work', 'reference work'], ['subject', 'subject'], ['activity', 'activity'], ['alphabetical', 'alphabetical'], ['encyclopedia', 'encyclopedia']], 'raw_glosses': ['(by extension) A reference work on a particular subject or activity in which the entries are arranged alphabetically; an alphabetical encyclopedia.'], 'tags': ['broadly']}, {'glosses': ['A person or thing regarded as a repository or compendium of information.'], 'links': [['person', 'person'], ['thing', 'thing'], ['repository', 'repository'], ['compendium', 'compendium'], ['information', 'information']], 'raw_glosses': ['(figurative) A person or thing regarded as a repository or compendium of information.'], 'tags': ['figuratively']}, {'glosses': ['The collection of words used or understood by a particular person; vocabulary.'], 'categories': ['English derogatory terms'], 'links': [['of', 'of#English'], ['derogatory', 'derogatory'], ['collection', 'collection'], ['word', 'word'], ['person', 'person'], ['vocabulary', 'vocabulary']], 'qualifier': 'frequently figurative', 'raw_glosses': ['(with of or possessive, frequently figurative, especially derogatory) The collection of words used or understood by a particular person; vocabulary.'], 'tags': ['derogatory', 'especially', 'possessive', 'with-of']}, {'glosses': ['A synchronic dictionary of a standardised language held to only contain words that are properly part of the language.'], 'categories': ['English terms with quotations'], 'links': [['the', 'the#English'], ['synchronic', 'synchronic']], 'qualifier': 'preceded by the', 'raw_glosses': ['(preceded by the) A synchronic dictionary of a standardised language held to only contain words that are properly part of the language.']}, {'glosses': ['An associative array, a data structure where each value is referenced by a particular key, analogous to words and definitions in a dictionary (sense 1).'], 'categories': ['English terms with quotations', 'en:Computing'], 'hyponyms': [{'word': 'hash table'}], 'links': [['computing', 'computing#Noun'], ['associative array', 'associative array']], 'raw_glosses': ['(computing) An associative array, a data structure where each value is referenced by a particular key, analogous to words and definitions in a dictionary (sense 1).'], 'topics': ['computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'sciences']}] : 1, [{'glosses': ['To look up in a dictionary.'], 'categories': ['English transitive verbs'], 'links': [['look up', 'look up']], 'raw_glosses': ['(transitive) To look up in a dictionary.'], 'tags': ['transitive']}, {'glosses': ['To add to a dictionary.'], 'categories': ['English terms with quotations', 'English transitive verbs'], 'links': [['add', 'add']], 'raw_glosses': ['(transitive) To add to a dictionary.'], 'tags': ['transitive']}, {'glosses': ['To compile a dictionary.'], 'categories': ['English intransitive verbs', 'English terms with quotations', 'English terms with rare senses'], 'links': [['compile', 'compile']], 'raw_glosses': ['(intransitive, rare) To compile a dictionary.'], 'tags': ['intransitive', 'rare']}] : 1, [{'glosses': ['Unconstrained.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.']}, {'glosses': ['Unconstrained.', 'Not imprisoned or enslaved.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}, {'word': 'bound'}, {'word': 'enslaved'}, {'word': 'imprisoned'}], 'categories': ['English terms with collocations', 'English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained'], ['imprisoned', 'imprisoned'], ['enslaved', 'enslaved']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', 'Not imprisoned or enslaved.']}, {'glosses': ['Unconstrained.', 'Generous; liberal.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained'], ['Generous', 'generous'], ['liberal', 'liberal']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', 'Generous; liberal.']}, {'glosses': ['Unconstrained.', 'Clear of offence or crime; guiltless; innocent.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with obsolete senses', 'English terms with quotations'], 'links': [['Unconstrained', 'unconstrained']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', '(obsolete) Clear of offence or crime; guiltless; innocent.'], 'tags': ['obsolete']}, {'glosses': ['Unconstrained.', 'Without obligations.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained'], ['obligation', 'obligation']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', 'Without obligations.']}, {'glosses': ['Unconstrained.', 'To be enjoyed by anyone freely.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', 'To be enjoyed by anyone freely.']}, {'glosses': ['Unconstrained.', 'Upholding individual rights.'], 'synonyms': [{'word': 'nonauthoritarian'}, {'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}, {'word': 'unfree'}, {'word': 'authoritarian'}], 'categories': ['English terms with collocations', 'English terms with quotations', 'English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', '(of a government, country) Upholding individual rights.'], 'raw_tags': ['of a government'], 'topics': ['country', 'location', 'region']}, {'glosses': ['Unconstrained.', 'With no or only freedom-preserving limitations on distribution or modification.'], 'synonyms': [{'word': 'free as in freedom'}, {'word': 'free as in speech'}, {'word': 'libre'}, {'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}, {'word': 'proprietary'}, {'word': 'nonfree'}], 'categories': ['English terms with quotations', 'English terms with usage examples', 'en:Software'], 'coordinate_terms': [{'word': 'gratis#Adjective'}, {'word': 'free as in beer'}], 'links': [['Unconstrained', 'unconstrained'], ['software', 'software'], ['limitation', 'limitation']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', '(software) With no or only freedom-preserving limitations on distribution or modification.'], 'topics': ['computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'sciences', 'software']}, {'glosses': ['Unconstrained.', 'Intended for release, and omitting debugging diagnostics, as opposed to a checked version.'], 'synonyms': [{'word': 'quit'}, {'word': 'unconstrained'}, {'word': 'unfettered'}, {'word': 'unhindered'}], 'antonyms': [{'word': 'constrained'}, {'word': 'restricted'}], 'categories': ['English terms with quotations', 'en:Software'], 'links': [['Unconstrained', 'unconstrained'], ['software', 'software'], ['release', 'release'], ['debug', 'debug'], ['checked', 'checked']], 'qualifier': 'social', 'raw_glosses': ['(social) Unconstrained.', '(software) Intended for release, and omitting debugging diagnostics, as opposed to a checked version.'], 'topics': ['computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'sciences', 'software']}, {'glosses': ['Obtainable without any payment.'], 'synonyms': [{'word': ';'}, {'word': 'costless'}, {'word': 'feeless'}, {'word': 'free as in beer'}, {'word': 'free of charge'}, {'word': 'gratis'}], 'antonyms': [{'word': 'see at nonfree'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'coordinate_terms': [{'word': 'libre#Adjective'}, {'word': 'free as in speech'}, {'word': 'free as in freedom'}], 'links': [['payment', 'payment']]}, {'glosses': ['Obtainable without any payment.', 'Complimentary.'], 'synonyms': [{'word': ';'}, {'word': 'costless'}, {'word': 'feeless'}, {'word': 'free as in beer'}, {'word': 'free of charge'}, {'word': 'gratis'}], 'antonyms': [{'word': 'see at nonfree'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'coordinate_terms': [{'word': 'libre#Adjective'}, {'word': 'free as in speech'}, {'word': 'free as in freedom'}], 'links': [['payment', 'payment'], ['Complimentary', 'complimentary#English']], 'raw_glosses': ['Obtainable without any payment.', '(by extension, chiefly used in advertising) Complimentary.'], 'raw_tags': ['used in advertising'], 'tags': ['broadly']}, {'glosses': ['Unconstrained.', 'In any of various technical senses generic, universal.', 'Such that any map f from X to the underlying set of an object A in the same category as F induces a map ÃÑf from F to A which is compatible with f (i.e. such that f=ÃÑf‚àòi).'], 'categories': ['en:Algebra', 'en:Category theory'], 'links': [['Unconstrained', 'unconstrained'], ['algebra', 'algebra'], ['algebraic structure', 'algebraic structure'], ['generic', 'generic'], ['universal', 'universal'], ['category theory', 'category theory'], ['concrete', 'concrete']], 'qualifier': 'most generally', 'raw_glosses': ['(abstract) Unconstrained.', '(algebra, of an algebraic structure) In any of various technical senses generic, universal.', '(most generally, category theory, of an object F in a concrete category, with respect to a set X and a map i from X to the underlying set of F) Such that any map f from X to the underlying set of an object A in the same category as F induces a map ÃÑf from F to A which is compatible with f (i.e. such that f=ÃÑf‚àòi).'], 'raw_tags': ['of an object F in a concrete category', 'with respect to a set X and a map i from X to the underlying set of F', 'of an algebraic structure'], 'tags': ['abstract'], 'topics': ['algebra', 'category-theory', 'computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'sciences']}, {'glosses': ['Unconstrained.', 'In any of various technical senses generic, universal.', 'Having a set of generators which satisfy no non-trivial relations; equivalently, being the group of reduced words on a set of generators.'], 'categories': ['English terms with usage examples', 'en:Algebra', 'en:Group theory'], 'links': [['Unconstrained', 'unconstrained'], ['algebra', 'algebra'], ['algebraic structure', 'algebraic structure'], ['generic', 'generic'], ['universal', 'universal'], ['group theory', 'group theory'], ['generators', 'generators'], ['non-trivial', 'non-trivial'], ['relation', 'relation'], ['reduced word', 'reduced word']], 'raw_glosses': ['(abstract) Unconstrained.', '(algebra, of an algebraic structure) In any of various technical senses generic, universal.', '(group theory, of a group) Having a set of generators which satisfy no non-trivial relations; equivalently, being the group of reduced words on a set of generators.'], 'raw_tags': ['of a group', 'of an algebraic structure'], 'tags': ['abstract'], 'topics': ['algebra', 'group-theory', 'mathematics', 'sciences']}, {'glosses': ['Unconstrained.', 'In any of various technical senses generic, universal.', 'Having a linearly independent set of generators (called a basis).'], 'categories': ['en:Algebra'], 'links': [['Unconstrained', 'unconstrained'], ['algebra', 'algebra'], ['algebraic structure', 'algebraic structure'], ['generic', 'generic'], ['universal', 'universal'], ['commutative algebra', 'commutative algebra'], ['module', 'module'], ['linearly independent', 'linearly independent'], ['basis', 'basis']], 'qualifier': 'commutative algebra', 'raw_glosses': ['(abstract) Unconstrained.', '(algebra, of an algebraic structure) In any of various technical senses generic, universal.', '(commutative algebra, of a module) Having a linearly independent set of generators (called a basis).'], 'raw_tags': ['of a module', 'of an algebraic structure'], 'tags': ['abstract'], 'topics': ['algebra', 'mathematics', 'sciences']}, {'glosses': ['Unconstrained.', 'Unconstrained by quantifiers.'], 'antonyms': [{'word': 'bound'}], 'categories': ['English terms with usage examples', 'en:Logic'], 'links': [['Unconstrained', 'unconstrained'], ['logic', 'logic'], ['variable', 'variable'], ['quantifier', 'quantifier']], 'raw_glosses': ['(abstract) Unconstrained.', '(logic, of a variable) Unconstrained by quantifiers.'], 'raw_tags': ['of a variable'], 'tags': ['abstract'], 'topics': ['human-sciences', 'logic', 'mathematics', 'philosophy', 'sciences']}, {'glosses': ['Unconstrained.', 'Unconstrained of identifiers, not bound.'], 'synonyms': [{'word': 'unbound'}], 'antonyms': [{'word': 'bound'}], 'categories': ['en:Programming'], 'links': [['Unconstrained', 'unconstrained'], ['programming', 'programming#Noun'], ['identifier', 'identifier'], ['bound', 'bound']], 'raw_glosses': ['(abstract) Unconstrained.', '(programming) Unconstrained of identifiers, not bound.'], 'tags': ['abstract'], 'topics': ['computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'programming', 'sciences']}, {'glosses': ['Unconstrained.', '(of a morpheme) That can be used by itself, unattached to another morpheme.'], 'categories': ['en:Linguistics'], 'links': [['Unconstrained', 'unconstrained'], ['linguistics', 'linguistics'], ['unattached', 'unattached'], ['morpheme', 'morpheme']], 'raw_glosses': ['(abstract) Unconstrained.', '(linguistics) (of a morpheme) That can be used by itself, unattached to another morpheme.'], 'tags': ['abstract'], 'topics': ['human-sciences', 'linguistics', 'sciences']}, {'glosses': ['Unconstrained.', 'Unobstructed, without blockages.'], 'synonyms': [{'word': 'clear'}, {'word': 'unobstructed'}], 'antonyms': [{'word': 'blocked'}, {'word': 'obstructed'}], 'categories': ['English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained'], ['blockage', 'blockage']], 'raw_glosses': ['(physical) Unconstrained.', 'Unobstructed, without blockages.'], 'tags': ['physical']}, {'glosses': ['Unconstrained.', 'Unattached or uncombined.'], 'synonyms': [{'word': 'free', 'source': 'Thesaurus:loose'}, {'word': 'loose'}, {'word': 'loose', 'source': 'Thesaurus:loose'}, {'word': 'unfastened'}, {'word': 'unfastened', 'source': 'Thesaurus:loose'}, {'word': 'unsecured', 'source': 'Thesaurus:loose'}, {'word': 'unstapled', 'source': 'Thesaurus:loose'}], 'categories': ['English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained']], 'raw_glosses': ['(physical) Unconstrained.', 'Unattached or uncombined.'], 'tags': ['physical']}, {'glosses': ['Unconstrained.', 'Not currently in use; not taken; unoccupied.'], 'categories': ['English terms with usage examples'], 'links': [['Unconstrained', 'unconstrained']], 'raw_glosses': ['(physical) Unconstrained.', 'Not currently in use; not taken; unoccupied.'], 'tags': ['physical']}, {'glosses': ['Unconstrained.', 'Not attached; loose.'], 'categories': ['English terms with quotations', 'English terms with usage examples', 'en:Botany', 'en:Mycology'], 'links': [['Unconstrained', 'unconstrained'], ['botany', 'botany'], ['mycology', 'mycology'], ['attached', 'attached'], ['loose', 'loose']], 'raw_glosses': ['(physical) Unconstrained.', '(botany, mycology) Not attached; loose.'], 'tags': ['physical'], 'topics': ['biology', 'botany', 'mycology', 'natural-sciences']}, {'glosses': ['Unconstrained.', 'Of a rocket or missile: not under the control of a guidance system after being launched.'], 'categories': ['en:Military'], 'links': [['Unconstrained', 'unconstrained'], ['military', 'military'], ['rocket', 'rocket'], ['missile', 'missile'], ['guidance', 'guidance'], ['system', 'system']], 'raw_glosses': ['(physical) Unconstrained.', '(military) Of a rocket or missile: not under the control of a guidance system after being launched.'], 'tags': ['physical'], 'topics': ['government', 'military', 'politics', 'war']}, {'glosses': ['Without; not containing (what is specified); exempt; clear; liberated.'], 'synonyms': [{'word': 'without'}], 'categories': ['English terms with quotations', 'English terms with usage examples']}, {'glosses': ['Ready; eager; acting without spurring or whipping; spirited.'], 'categories': ['English dated terms', 'English terms with usage examples'], 'raw_glosses': ['(dated) Ready; eager; acting without spurring or whipping; spirited.'], 'tags': ['dated']}, {'glosses': ['Invested with a particular freedom or franchise; enjoying certain immunities or privileges; admitted to special rights; followed by of.'], 'categories': ['English dated terms', 'English terms with quotations'], 'raw_glosses': ['(dated) Invested with a particular freedom or franchise; enjoying certain immunities or privileges; admitted to special rights; followed by of.'], 'tags': ['dated']}, {'glosses': ['Certain or honourable; the opposite of base.'], 'categories': ['British English', 'English terms with obsolete senses', 'English terms with usage examples', 'en:Law'], 'links': [['law', 'law#English'], ['base', 'base']], 'raw_glosses': ['(UK, law, obsolete) Certain or honourable; the opposite of base.'], 'tags': ['UK', 'obsolete'], 'topics': ['law']}, {'glosses': ['Privileged or individual; proprietary.'], 'antonyms': [{'word': 'common'}, {'word': 'nonproprietary'}], 'categories': ['English terms with usage examples', 'en:Law'], 'links': [['law', 'law#English']], 'raw_glosses': ['(law) Privileged or individual; proprietary.'], 'topics': ['law']}] : 1, [{'glosses': ['Without needing to pay.'], 'synonyms': [{'word': 'for free'}, {'word': 'for nothing'}, {'word': 'gratis'}], 'categories': ['English terms with quotations', 'English terms with usage examples'], 'links': [['pay', 'pay']]}, {'glosses': ['Freely; willingly.'], 'categories': ['English terms with obsolete senses', 'English terms with quotations'], 'raw_glosses': ['(obsolete) Freely; willingly.'], 'tags': ['obsolete']}] : 1, [{'glosses': ['To make free; set at liberty; release.'], 'categories': ['English terms with quotations', 'English transitive verbs'], 'links': [['liberty', 'liberty'], ['release', 'release']], 'raw_glosses': ['(transitive) To make free; set at liberty; release.'], 'tags': ['transitive']}, {'glosses': ['To rid of something that confines or oppresses.'], 'categories': ['English terms with quotations', 'English transitive verbs', 'Quotation templates to be cleaned'], 'info_templates': [{'args': {'1': 'en', '2': ':from'}, 'name': '+obj', 'extra_data': {'words': ['from']}, 'expansion': '[with from]'}], 'links': [['rid', 'rid']], 'raw_glosses': ['(transitive) To rid of something that confines or oppresses. [with from]'], 'tags': ['transitive']}, {'glosses': ['To relinquish (previously allocated memory) to the system.'], 'categories': ['English terms with quotations', 'English transitive verbs', 'Quotation templates to be cleaned', 'en:Programming'], 'links': [['programming', 'programming#Noun'], ['relinquish', 'relinquish'], ['allocate', 'allocate'], ['memory', 'memory'], ['system', 'system']], 'raw_glosses': ['(transitive, programming) To relinquish (previously allocated memory) to the system.'], 'tags': ['transitive'], 'topics': ['computing', 'engineering', 'mathematics', 'natural-sciences', 'physical-sciences', 'programming', 'sciences']}] : 1",101,27
5,EEF,5,forms,object,81,List,"nan : 20, [{'form': 'pies', 'tags': ['plural']}] : 4, [{'form': 'marches', 'tags': ['plural']}] : 3, [{'form': 'cats', 'tags': ['plural']}] : 2, [{'form': 'pies', 'tags': ['present', 'singular', 'third-person']}, {'form': 'pieing', 'tags': ['participle', 'present']}, {'form': 'pied', 'tags': ['participle', 'past']}, {'form': 'pied', 'tags': ['past']}] : 2",101,27
6,EEF,6,derived,object,47,List,"nan : 54, [{'word': 'antidictionary'}, {'word': 'dicktionary'}, {'word': 'dictionarial'}, {'word': 'dictionarian'}, {'word': 'dictionaric'}, {'word': 'dictionarily'}, {'word': 'dictionarist'}, {'word': 'dictionarization'}, {'word': 'dictionary attack'}, {'word': 'dictionary attacker'}, {'word': 'dictionary definition'}, {'word': 'dictionaryese'}, {'word': 'dictionary form'}, {'word': 'dictionaryless'}, {'word': 'dictionarylike'}, {'word': 'dictionary-monger'}, {'word': 'fictionary'}, {'word': 'have swallowed a dictionary'}, {'word': 'hyperdictionary'}, {'word': 'interdictionary'}, {'word': 'long-haired dictionary'}, {'word': 'minidictionary'}, {'word': 'multidictionary'}, {'word': 'nondictionary'}, {'word': 'Pictionary'}, {'word': 'pillow dictionary'}, {'word': 'predictionary'}, {'word': 'sleeping dictionary'}, {'word': 'superdictionary'}, {'word': 'swallow the dictionary'}, {'word': 'walking dictionary'}] : 1, [{'word': 'undictionaried'}] : 1, [{'word': 'break free'}, {'word': 'feel free'}, {'word': 'first bite free'}, {'word': 'footloose and fancy free'}, {'word': 'free Abelian group'}, {'word': 'free abelian group'}, {'word': 'freeable'}, {'word': 'freeaboo'}, {'word': 'free agency'}, {'word': 'free agent'}, {'word': 'free algebra'}, {'word': 'free alongside'}, {'word': 'free alongside ship'}, {'word': 'free alongside ships'}, {'word': 'free as a bird'}, {'word': 'free as in beer'}, {'word': 'free as in freedom'}, {'word': 'free as in speech'}, {'word': 'free association'}, {'word': 'free as the wind'}, {'word': 'free ball'}, {'word': 'freeball'}, {'word': 'freebander'}, {'word': 'freebanding'}, {'word': 'free base'}, {'word': 'freebase'}, {'word': 'free bench'}, {'word': 'freebirth'}, {'word': 'free-bleeding'}, {'word': 'free-blown'}, {'word': 'freeblown'}, {'word': 'free board'}, {'word': 'free body'}, {'word': 'free-body diagram'}, {'word': 'free body diagram'}, {'word': 'free-boob'}, {'word': 'free Boolean algebra'}, {'word': 'freebooter'}, {'word': 'freeborn'}, {'word': 'free box'}, {'word': 'FreeBSD'}, {'word': 'freeburn'}, {'word': 'free cash flow'}, {'word': 'free category'}, {'word': 'free cell formation'}, {'word': 'freechapel'}, {'word': 'Free China'}, {'word': 'free church'}, {'word': 'free city'}, {'word': 'Free City of Tri-Insula'}, {'word': 'freeclimb'}, {'word': 'free-climb'}, {'word': 'free climber'}, {'word': 'free climbing'}, {'word': 'free-climbing'}, {'word': 'free clinic'}, {'word': 'free clothes association'}, {'word': 'free coinage'}, {'word': 'free communism'}, {'word': 'free communist'}, {'word': 'free companion'}, {'word': 'freeconomics'}, {'word': 'free convection'}, {'word': 'free-cooling'}, {'word': 'free corps'}, {'word': 'free country'}, {'word': 'freecycle'}, {'word': 'Freecycle'}, {'word': 'free day'}, {'word': 'free diver'}, {'word': 'free-diver'}, {'word': 'free diving'}, {'word': 'free-diving'}, {'word': 'freedom'}, {'word': ""freedom ain't free""}, {'word': 'freedom is not free'}, {'word': ""freedom isn't free""}, {'word': ""freedom's not free""}, {'word': 'free edge'}, {'word': 'free electron'}, {'word': 'free energy'}, {'word': 'free enterprise'}, {'word': 'free-exercise clause'}, {'word': 'free expression'}, {'word': 'freefall'}, {'word': 'freefaller'}, {'word': 'free fall'}, {'word': 'free-fall'}, {'word': 'free-fall time'}, {'word': 'free fatty acid'}, {'word': 'free-feed'}, {'word': 'free-fire'}, {'word': 'free float'}, {'word': 'free-floating'}, {'word': 'free-floating planet'}, {'word': 'freeflow'}, {'word': 'free-flowing'}, {'word': 'free-fly'}, {'word': 'free-flying'}, {'word': 'free-for-all'}, {'word': 'free form'}, {'word': 'free-form'}, {'word': 'Free France'}, {'word': 'free-from'}, {'word': 'freegan'}, {'word': 'free grace'}, {'word': 'free group'}, {'word': 'freehand'}, {'word': 'freehanded'}, {'word': 'free hand'}, {'word': 'free-hand'}, {'word': 'free-handed'}, {'word': 'free-handedly'}, {'word': 'free-handedness'}, {'word': 'free-hanging'}, {'word': 'freehearted'}, {'word': 'free-hearted'}, {'word': 'free-heartedly'}, {'word': 'free-heartedness'}, {'word': 'free-heel skiing'}, {'word': 'free helicopter ride'}, {'word': 'freeholding'}, {'word': 'freehood'}, {'word': 'free house'}, {'word': 'freehub'}, {'word': 'free imperial city'}, {'word': 'free indirect discourse'}, {'word': 'free indirect speech'}, {'word': 'free indirect style'}, {'word': 'freeish'}, {'word': 'free jazz'}, {'word': 'Free Kirk'}, {'word': 'free lance'}, {'word': 'freelance'}, {'word': 'Freelander'}, {'word': 'free leg'}, {'word': 'free library'}, {'word': 'freeline'}, {'word': 'free list'}, {'word': 'free liver'}, {'word': 'free-liver'}, {'word': 'free-living'}, {'word': 'free-loader'}, {'word': 'freeloader'}, {'word': 'freelook'}, {'word': 'free love'}, {'word': 'free lover'}, {'word': 'free lunch'}, {'word': 'freely'}, {'word': 'freemail'}, {'word': 'freeman'}, {'word': 'Freeman'}, {'word': 'free market'}, {'word': 'free-market'}, {'word': 'free marketeer'}, {'word': 'free-marketeer'}, {'word': 'free marketeering'}, {'word': 'free-market fundamentalism'}, {'word': 'free-marketism'}, {'word': 'Freemason'}, {'word': 'freemason'}, {'word': 'free-milling'}, {'word': 'freeminer'}, {'word': 'freemium'}, {'word': 'free-mix'}, {'word': 'freemix'}, {'word': 'free mixing'}, {'word': 'free module'}, {'word': 'free monoid'}, {'word': 'free morpheme'}, {'word': 'freeness'}, {'word': 'Freenet'}, {'word': 'free neutron'}, {'word': 'free object'}, {'word': 'free of charge'}, {'word': 'free of the city'}, {'word': 'free on board'}, {'word': 'free pass'}, {'word': 'free period'}, {'word': 'freephone'}, {'word': 'free port'}, {'word': 'freepost'}, {'word': 'free press'}, {'word': 'free product'}, {'word': 'free radical'}, {'word': 'free range'}, {'word': 'free-range'}, {'word': 'free-ranging'}, {'word': 'free reed'}, {'english': 'or free reign', 'translation': 'or free reign', 'word': 'free rein'}, {'word': 'free ride'}, {'word': 'free-ride'}, {'word': 'free rider'}, {'word': 'free-rider problem'}, {'word': 'free roam'}, {'word': 'free-roam'}, {'word': 'free-roaming'}, {'word': 'freeroll'}, {'word': 'freerun'}, {'word': 'free run'}, {'word': 'free runner'}, {'word': 'free-running'}, {'word': 'free running'}, {'word': 'freerunning'}, {'word': 'free school movement'}, {'word': 'free semigroup'}, {'word': 'freesheet'}, {'word': 'free sheet'}, {'word': 'free-sheet'}, {'word': 'freeship'}, {'word': 'free shop'}, {'word': 'free silver'}, {'word': 'free silverite'}, {'word': 'freeskiing'}, {'word': 'Free-Soilism'}, {'word': 'free solo'}, {'word': 'free-solo'}, {'word': 'free soloist'}, {'word': 'freesome'}, {'word': 'Free Soviets'}, {'word': 'free space'}, {'word': 'free-speaking'}, {'word': 'free speech'}, {'word': 'free-speecher'}, {'word': 'free speech zone'}, {'word': 'free spin'}, {'word': 'free spirit'}, {'word': 'free-spirited'}, {'word': 'free-spoken'}, {'word': 'freespool'}, {'word': 'freestanding'}, {'word': 'free-standing'}, {'word': 'free state'}, {'word': 'freester'}, {'word': 'free-stone'}, {'word': 'freestone'}, {'word': 'free store'}, {'word': 'freestore'}, {'word': 'freestream'}, {'word': 'free-style'}, {'word': 'free substitution'}, {'word': 'free sugar'}, {'word': 'free surface'}, {'word': 'free surface effect'}, {'word': 'free-swimming'}, {'word': 'freetail'}, {'word': 'free-tailed bat'}, {'word': 'free tekno'}, {'word': 'free termineme'}, {'word': 'freethinker'}, {'word': 'free thinker'}, {'word': 'free-thinker'}, {'word': 'free-thinking'}, {'word': 'freethinking'}, {'word': 'free thought'}, {'word': 'free throw'}, {'word': 'free-throw lane'}, {'word': 'free-throw line'}, {'word': 'free throw percentage'}, {'word': 'free time'}, {'word': 'free to air'}, {'word': 'free-to-air'}, {'word': 'free-tongued'}, {'word': 'free to play'}, {'word': 'free-to-play'}, {'word': 'Freetown'}, {'word': 'free trade'}, {'word': 'free trade area'}, {'word': 'free-trade area'}, {'word': 'free trader'}, {'word': 'free trial'}, {'word': 'free-turbine engine'}, {'word': 'free ultrafilter'}, {'word': 'free university'}, {'word': 'free up'}, {'word': 'free use'}, {'word': 'free variable'}, {'word': 'free variation'}, {'word': 'free verse'}, {'word': 'free vote'}, {'word': 'freeware'}, {'word': 'free warren'}, {'word': 'free water'}, {'word': 'freeway'}, {'word': 'free weight'}, {'word': 'freewheel'}, {'word': 'free-wheeling'}, {'word': 'free-will'}, {'word': 'free will'}, {'word': 'free-willer'}, {'word': 'free will theorem'}, {'word': 'freewoman'}, {'word': 'free world'}, {'word': 'freewriting'}, {'word': 'free zone'}, {'word': 'get out of jail free card'}, {'word': 'get-out-of-jail-free card'}, {'word': 'Gibbs free energy'}, {'word': 'go free'}, {'word': 'half-free'}, {'word': 'home free'}, {'word': ""it's a free country""}, {'word': 'land of the free'}, {'word': 'leader of the free world'}, {'word': ""live rent-free in someone's head""}, {'word': ""live rent free in someone's head""}, {'word': 'make free of'}, {'word': 'make free with'}, {'word': 'mean free path'}, {'word': 'mean free time'}, {'word': 'no free lunch theorem'}, {'word': 'non-free software'}, {'word': 'olly olly oxen free'}, {'word': 'region free'}, {'word': 'set free'}, {'word': 'the best things in life are free'}, {'word': ""there ain't no such thing as a free lunch""}, {'word': ""there's no such thing as a free lunch""}, {'word': 'there is no free lunch'}, {'word': 'there is no such thing as a free lunch'}, {'word': 'uncoated free sheet'}, {'word': 'walk free'}, {'word': 'weapons free'}, {'word': 'why buy the cow when you can get the milk for free'}] : 1, [{'word': 'befree'}, {'word': 'free up'}] : 1",101,27
7,EEF,7,etymology_templates,object,87,List,"nan : 14, [{'name': 'af', 'args': {'1': 'en', '2': 'non-', 't1': 'no, none, lack of', '3': 'sense'}, 'expansion': 'non- (‚Äúno, none, lack of‚Äù) + sense'}, {'name': 'cog', 'args': {'1': 'nl', '2': 'onzin', 't': 'nonsense'}, 'expansion': 'Dutch onzin (‚Äúnonsense‚Äù)'}, {'name': 'cog', 'args': {'1': 'en', '2': 'unsense', 't': 'nonsense'}, 'expansion': 'English unsense (‚Äúnonsense‚Äù)'}] : 4, [{'name': 'root', 'args': {'1': 'en', '2': 'ine-pro', '3': '*werh‚ÇÅ-', '4': '*d ∞eh‚ÇÅ-'}, 'expansion': ''}, {'name': 'inh', 'args': {'1': 'en', '2': 'enm', '3': 'word'}, 'expansion': 'Middle English word'}, {'name': 'inh', 'args': {'1': 'en', '2': 'ang', '3': 'word'}, 'expansion': 'Old English word'}, {'name': 'inh', 'args': {'1': 'en', '2': 'gmw-pro', '3': '*word'}, 'expansion': 'Proto-West Germanic *word'}, {'name': 'inh', 'args': {'1': 'en', '2': 'gem-pro', '3': '*wurdƒÖ'}, 'expansion': 'Proto-Germanic *wurdƒÖ'}, {'name': 'inh', 'args': {'1': 'en', '2': 'ine-pro', '3': '*werd ∞h‚ÇÅom', '4': '*wrÃ•d ∞h‚ÇÅom'}, 'expansion': 'Proto-Indo-European *wrÃ•d ∞h‚ÇÅom'}, {'name': 'doublet', 'args': {'1': 'en', '2': 'verb', '3': 'verve'}, 'expansion': 'Doublet of verb and verve'}] : 3, [{'name': 'sense', 'args': {'1': 'highest rank', '2': 'grade', '3': 'music'}, 'expansion': '(highest rank, grade, music):'}, {'name': 'sense', 'args': {'1': 'blood type'}, 'expansion': '(blood type):'}] : 3, [{'name': 'etymid', 'args': {'1': 'en', '2': 'word'}, 'expansion': ''}] : 3",101,27
8,EEF,8,etymology_text,object,92,56,"nan : 9, From non- (‚Äúno, none, lack of‚Äù) + sense, from c. 1610. Compare the semantically similar West Frisian √ªnsin (‚Äúnonsense‚Äù), Dutch onzin (‚Äúnonsense‚Äù), German Unsinn (‚Äúnonsense‚Äù), English unsense (‚Äúnonsense‚Äù). : 4, First used by Lewis Carroll in Through the Looking-Glass to describe the words he coined in ‚ÄúJabberwocky‚Äù. : 3, * (highest rank, grade, music): From the initial position of the letter A in the English alphabet.\n* (blood type): From A antigen : 3, From Middle English word, from Old English word, from Proto-West Germanic *word, from Proto-Germanic *wurdƒÖ, from Proto-Indo-European *wrÃ•d ∞h‚ÇÅom. Doublet of verb and verve; further related to vrata. : 3",101,27
9,EEF,9,synonyms,object,18,List,"nan : 83, [{'word': 'baudrons', 'source': 'Thesaurus:cat'}, {'word': 'cat', 'source': 'Thesaurus:cat'}, {'word': 'domestic cat', 'source': 'Thesaurus:cat'}, {'word': 'grimalkin', 'source': 'Thesaurus:cat'}, {'word': 'housecat', 'source': 'Thesaurus:cat'}, {'word': 'kibty', 'source': 'Thesaurus:cat'}, {'word': 'kitten', 'source': 'Thesaurus:cat'}, {'word': 'kitter', 'source': 'Thesaurus:cat'}, {'word': 'kitty', 'source': 'Thesaurus:cat'}, {'word': 'kitty-cat', 'source': 'Thesaurus:cat'}, {'word': 'kitty witty', 'source': 'Thesaurus:cat'}, {'word': 'malkin', 'source': 'Thesaurus:cat'}, {'word': 'mog', 'source': 'Thesaurus:cat'}, {'word': 'moggy', 'source': 'Thesaurus:cat'}, {'word': 'mouser', 'source': 'Thesaurus:cat'}, {'word': 'puss', 'source': 'Thesaurus:cat'}, {'word': 'pussy', 'source': 'Thesaurus:cat'}, {'word': 'pussy-cat', 'source': 'Thesaurus:cat'}] : 3, [{'word': 'dict', 'source': 'Thesaurus:dictionary'}, {'word': 'dict.', 'source': 'Thesaurus:dictionary'}, {'word': 'dictionary', 'source': 'Thesaurus:dictionary'}, {'word': 'explanatory dictionary', 'source': 'Thesaurus:dictionary'}, {'word': 'interpreter', 'source': 'Thesaurus:dictionary', 'tags': ['obsolete']}, {'word': 'lexicon', 'source': 'Thesaurus:dictionary'}] : 1, [{'word': 'befree'}, {'word': 'emancipate'}, {'word': 'let loose'}, {'word': 'liberate'}, {'word': 'manumit'}, {'word': 'release'}, {'word': 'unchain'}, {'word': 'unfetter'}, {'word': 'unshackle'}] : 1, [{'word': 'complimentary', 'source': 'Thesaurus:gratis'}, {'word': 'costless', 'source': 'Thesaurus:gratis'}, {'word': 'chargeless', 'source': 'Thesaurus:gratis'}, {'word': 'free', 'source': 'Thesaurus:gratis'}, {'word': 'free of charge', 'source': 'Thesaurus:gratis'}, {'word': 'gratis', 'source': 'Thesaurus:gratis'}, {'word': 'gratuitous', 'source': 'Thesaurus:gratis'}] : 1",101,27


In [None]:
df.info

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 27 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   word                 101 non-null    object 
 1   pos                  101 non-null    object 
 2   lang_code            101 non-null    object 
 3   standard_lang        101 non-null    object 
 4   senses               101 non-null    object 
 5   forms                81 non-null     object 
 6   derived              47 non-null     object 
 7   etymology_templates  87 non-null     object 
 8   etymology_text       92 non-null     object 
 9   synonyms             18 non-null     object 
 10  antonyms             3 non-null      object 
 11  hypernyms            14 non-null     object 
 12  hyponyms             15 non-null     object 
 13  holonyms             4 non-null      object 
 14  meronyms             9 non-null      object 
 15  categories           101 non-null    obj

In [None]:
def drop_df_column_groups(df, groups=['forms', 'etymology', 'nyms', 'categories'], individual_cols=['wl_code', 'lang']):
    df_cols = df.columns
    ind_cols = [c for c in individual_cols if c in df_cols]
    cols_to_drop = ind_cols
    
    forms_grouping = ['form_of','forms', 'alt_of', 'inflection_templates', 'derived']
    etymology_grouping = ['etymology_templates', 'etymology_text', 'etymology_tree']
    nyms_grouping = ['synonyms', 'antonyms', 'hypernyms','hyponyms', 'troponyms', 'holonyms', 'meronyms']
    categories_grouping = ['categories', 'links', 'related', 'topics']

    if groups:
        for group in groups:
            if 'forms' == group:
                forms_grouping = [c for c in forms_grouping if c in df_cols]
                protected_cols += forms_grouping
            if 'etymology' == group:
                etymology_grouping = [c for c in etymology_grouping if c in df_cols]
                protected_cols += etymology_grouping

            if 'nyms' == group:
                nyms_grouping = [c for c in nyms_grouping if c in df_cols]
                protected_cols += nyms_grouping

            if 'categories' == group:
                categories_grouping = [c for c in categories_grouping if c in df_cols]
                protected_cols += categories_grouping

    protected_cols = [c for c in protected_cols if c in df_cols]
    unprotected_cols = [c for c in df_cols if c not in protected_cols and c not in end]

    unprotected_cols.sort()
    new_cols = protected_cols + unprotected_cols + end
    df = df.loc[:, new_cols]
    return df