In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [15]:
# Load vocabulary list
words = pd.read_csv("mp_gre_1000_words.csv", encoding='latin-1')
words.head(6)

Unnamed: 0,Word,Definition
0,advocate,Speak or argue in favor of (verb); a person wh...
1,aerie,Dwelling or fortress built on a high place; th...
2,aesthetic,Concerning the appreciation of beauty or good ...
3,affable,"Warm and friendly, pleasant, approachable"
4,affectation,Fake behavior (such as in speech or dress) ado...
5,aggrandize,Make greater; exaggerate
6,aggregate,"Gather together, amount to (verb); constitutin..."
7,alacrity,Cheerful or speedy willingness
8,albeit,"Although, even though"
9,alienate,"Cause to become unfriendly, hostile, or distant"


In [20]:
# Need to remove leading spaces 
# in definition column to be able to sort its values alphabetically

words["Definition"] = words["Definition"].apply(lambda x: x.lstrip())
words.head(6)

Unnamed: 0,Word,Definition
0,advocate,Speak or argue in favor of (verb); a person wh...
1,aerie,Dwelling or fortress built on a high place; th...
2,aesthetic,Concerning the appreciation of beauty or good ...
3,affable,"Warm and friendly, pleasant, approachable"
4,affectation,Fake behavior (such as in speech or dress) ado...
5,aggrandize,Make greater; exaggerate
6,aggregate,"Gather together, amount to (verb); constitutin..."
7,alacrity,Cheerful or speedy willingness
8,albeit,"Although, even though"
9,alienate,"Cause to become unfriendly, hostile, or distant"


In [4]:
# Need to capitalize first word in definitons for consistency
# NOTE: words are intentionally left lowercase
words["Definition"] = words["Definition"].apply(lambda x: x.capitalize())
words.head(6)

Unnamed: 0,Word,Definition
0,advocate,Speak or argue in favor of (verb); a person wh...
1,aerie,Dwelling or fortress built on a high place; th...
2,aesthetic,Concerning the appreciation of beauty or good ...
3,affable,"Warm and friendly, pleasant, approachable"
4,affectation,Fake behavior (such as in speech or dress) ado...
5,aggrandize,Make greater; exaggerate
6,aggregate,"Gather together, amount to (verb); constitutin..."
7,alacrity,Cheerful or speedy willingness
8,albeit,"Although, even though"
9,alienate,"Cause to become unfriendly, hostile, or distant"


In [5]:
# Need to remove leading spaces 
# in Word column to be able to sort its values alphabetically
words["Word"] = words["Word"].apply(lambda x: x.lstrip())
words.head(6)

Unnamed: 0,Word,Definition
0,advocate,Speak or argue in favor of (verb); a person wh...
1,aerie,Dwelling or fortress built on a high place; th...
2,aesthetic,Concerning the appreciation of beauty or good ...
3,affable,"Warm and friendly, pleasant, approachable"
4,affectation,Fake behavior (such as in speech or dress) ado...
5,aggrandize,Make greater; exaggerate
6,aggregate,"Gather together, amount to (verb); constitutin..."
7,alacrity,Cheerful or speedy willingness
8,albeit,"Although, even though"
9,alienate,"Cause to become unfriendly, hostile, or distant"


In [6]:
# Sorting word values alphabetically
sorted_words = words.sort_values("Word")
sorted_words.head(6)

Unnamed: 0,Word,Definition
995,abacus,Frame with balls for calculating
495,abase,"Degrade or humble; to lower in rank, status, o..."
996,abate,To lessen to subside
496,abate,"Reduce, diminish"
497,abdicate,Formally give up the throne (or some other pow...
997,abdication,Giving up control authority
498,aberrant,"Abnormal, deviant"
998,aberration,Straying away from what is normal
999,abet,Help/encourage smb (in doing wrong)
1000,abeyance,Suspended action


In [7]:
# Need to reset index of words and definitions once in alphabetical order
sorted_words = sorted_words.reset_index()
sorted_words.head(6)

Unnamed: 0,index,Word,Definition
0,995,abacus,Frame with balls for calculating
1,495,abase,"Degrade or humble; to lower in rank, status, o..."
2,996,abate,To lessen to subside
3,496,abate,"Reduce, diminish"
4,497,abdicate,Formally give up the throne (or some other pow...


In [8]:
# Need to drop previous index of words and definitions
# after resetting index and sorting words in alphabetical order
sorted_words.drop("index", axis = 1, inplace = True)
sorted_words.head(6)

Unnamed: 0,Word,Definition
0,abacus,Frame with balls for calculating
1,abase,"Degrade or humble; to lower in rank, status, o..."
2,abate,To lessen to subside
3,abate,"Reduce, diminish"
4,abdicate,Formally give up the throne (or some other pow...


In [9]:
# Need to check for duplicates
def check_duplicates(list_of_words):
    result = False
    setofwords = set()
    for elem in list_of_words:
        if elem in setofwords:
            result = True
        else: 
            setofwords.add(elem)
    if result == True:
        print("This list contains duplicated words.")
    elif result == False:
        print("There are no duplicate words.")

check_duplicates(sorted_words["Word"])

This list contains duplicated words.


In [10]:
# Need to show which words are duplicated
# Select duplicate rows except first occurrence based on all columns
duplicate_df = sorted_words[sorted_words.duplicated(['Word'], keep = False)]
duplicate_df.head(6)

Unnamed: 0,Word,Definition
2,abate,To lessen to subside
3,abate,"Reduce, diminish"
9,abeyance,Suspended action
10,abeyance,"Temporary suspension, inactivity"
11,abhor,To hate to detest
12,abhor,"Detest, regard with disgust"
14,abjure,Promise or swear to give up
15,abjure,"Give up, renounce; repudiate, recant, or shun ..."
22,abscond,To go away suddenly (to avoid arrest)
23,abscond,Depart suddenly and secretively


In [11]:
### Need to create two different dataframes to merge duplicated values
### Select duplicate rows except first occurrence based on all columns
duplicate_df_first = sorted_words[sorted_words.duplicated(['Word'], keep = 'first')]
duplicate_df_first.head(6)

Unnamed: 0,Word,Definition
3,abate,"Reduce, diminish"
10,abeyance,"Temporary suspension, inactivity"
12,abhor,"Detest, regard with disgust"
15,abjure,"Give up, renounce; repudiate, recant, or shun ..."
23,abscond,Depart suddenly and secretively
34,accretion,The growing of separate things into one
104,apostate,One who abandons long-held religious or politi...
111,apprise,"Inform, give notice to"
113,approbation,"Praise or approval, especially formal approval"
125,arduous,Steep difficult ascent laborious


In [24]:
### Need to create two different dataframes to merge duplicated values
### Select duplicate rows except first occurrence based on all columns
duplicate_df_last = sorted_words[sorted_words.duplicated(['Word'], keep = 'last')]
duplicate_df_last.head(6)

Unnamed: 0,Word,Definition
2,abate,To lessen to subside
9,abeyance,Suspended action
11,abhor,To hate to detest
14,abjure,Promise or swear to give up
22,abscond,To go away suddenly (to avoid arrest)
33,accretion,Gradual increase; an added part or addition
103,apostate,"Person who deserts a party, cause, religion, etc."
110,apprise,Give notice to inform
112,approbation,Approval
124,arduous,"Very difficult, strenuous; severe, hard to endure"


In [25]:
duplicate_rows = duplicate_df.reset_index()
duplicate_rows.drop("index", axis = 1, inplace = True)
duplicate_rows.head(6)

Unnamed: 0,Word,Definition
0,abate,To lessen to subside
1,abate,"Reduce, diminish"
2,abeyance,Suspended action
3,abeyance,"Temporary suspension, inactivity"
4,abhor,To hate to detest
5,abhor,"Detest, regard with disgust"
6,abjure,Promise or swear to give up
7,abjure,"Give up, renounce; repudiate, recant, or shun ..."
8,abscond,To go away suddenly (to avoid arrest)
9,abscond,Depart suddenly and secretively


In [26]:
merge_words = pd.merge(duplicate_df_first, duplicate_df_last, on = ['Word'])
merge_words.head(6)

Unnamed: 0,Word,Definition_x,Definition_y
0,abate,"Reduce, diminish",To lessen to subside
1,abeyance,"Temporary suspension, inactivity",Suspended action
2,abhor,"Detest, regard with disgust",To hate to detest
3,abjure,"Give up, renounce; repudiate, recant, or shun ...",Promise or swear to give up
4,abscond,Depart suddenly and secretively,To go away suddenly (to avoid arrest)
5,accretion,The growing of separate things into one,Gradual increase; an added part or addition
6,apostate,One who abandons long-held religious or politi...,"Person who deserts a party, cause, religion, etc."
7,apprise,"Inform, give notice to",Give notice to inform
8,approbation,"Praise or approval, especially formal approval",Approval
9,arduous,Steep difficult ascent laborious,"Very difficult, strenuous; severe, hard to endure"


In [27]:
merge_words['Definition'] = merge_words[["Definition_x", "Definition_y"]].apply(lambda x: "; ".join(x), axis=1)
merge_words.head(6)

Unnamed: 0,Word,Definition_x,Definition_y,Definition
0,abate,"Reduce, diminish",To lessen to subside,"Reduce, diminish; To lessen to subside"
1,abeyance,"Temporary suspension, inactivity",Suspended action,"Temporary suspension, inactivity; Suspended ac..."
2,abhor,"Detest, regard with disgust",To hate to detest,"Detest, regard with disgust; To hate to detest..."
3,abjure,"Give up, renounce; repudiate, recant, or shun ...",Promise or swear to give up,"Give up, renounce; repudiate, recant, or shun ..."
4,abscond,Depart suddenly and secretively,To go away suddenly (to avoid arrest),Depart suddenly and secretively; To go away su...


In [28]:
merge_words.drop(["Definition_x", "Definition_y"], axis = 1, inplace = True)
merge_words.head(6)

Unnamed: 0,Word,Definition
0,abate,"Reduce, diminish; To lessen to subside"
1,abeyance,"Temporary suspension, inactivity; Suspended ac..."
2,abhor,"Detest, regard with disgust; To hate to detest..."
3,abjure,"Give up, renounce; repudiate, recant, or shun ..."
4,abscond,Depart suddenly and secretively; To go away su...


In [29]:
duplicates_dropped = sorted_words.drop_duplicates(["Word"], keep = False)
duplicates_dropped.head(6)

Unnamed: 0,Word,Definition
0,abacus,Frame with balls for calculating
1,abase,"Degrade or humble; to lower in rank, status, o..."
4,abdicate,Formally give up the throne (or some other pow...
5,abdication,Giving up control authority
6,aberrant,"Abnormal, deviant"
7,aberration,Straying away from what is normal
8,abet,Help/encourage smb (in doing wrong)
13,abide,Be faithful to endure
16,abraded,Rubbed off worn away by friction
17,abrasive,"Rough, suitable for grinding or polishing (suc..."


In [32]:
vocabulary_list = pd.concat([duplicates_dropped, merge_words])
vocabulary_list.head(6)

Unnamed: 0,Word,Definition
0,abacus,Frame with balls for calculating
1,abase,"Degrade or humble; to lower in rank, status, o..."
4,abdicate,Formally give up the throne (or some other pow...
5,abdication,Giving up control authority
6,aberrant,"Abnormal, deviant"
7,aberration,Straying away from what is normal
8,abet,Help/encourage smb (in doing wrong)
13,abide,Be faithful to endure
16,abraded,Rubbed off worn away by friction
17,abrasive,"Rough, suitable for grinding or polishing (suc..."


In [33]:
### Sorting word values alphabetically again
vocabulary_list = vocabulary_list.sort_values("Word")
vocabulary_list.head(6)

Unnamed: 0,Word,Definition
0,abacus,Frame with balls for calculating
1,abase,"Degrade or humble; to lower in rank, status, o..."
0,abate,"Reduce, diminish; To lessen to subside"
4,abdicate,Formally give up the throne (or some other pow...
5,abdication,Giving up control authority
6,aberrant,"Abnormal, deviant"
7,aberration,Straying away from what is normal
8,abet,Help/encourage smb (in doing wrong)
1,abeyance,"Temporary suspension, inactivity; Suspended ac..."
2,abhor,"Detest, regard with disgust; To hate to detest..."


In [34]:
# Need to reset index of words and definitions once in alphabetical order again
# Need to drop previous index of words and definitions
# after resetting index and sorting words in alphabetical order
vocabulary_list = vocabulary_list.reset_index()
vocabulary_list.drop("index", axis = 1, inplace = True)
vocabulary_list.head(6)

Unnamed: 0,Word,Definition
0,abacus,Frame with balls for calculating
1,abase,"Degrade or humble; to lower in rank, status, o..."
2,abate,"Reduce, diminish; To lessen to subside"
3,abdicate,Formally give up the throne (or some other pow...
4,abdication,Giving up control authority
5,aberrant,"Abnormal, deviant"
6,aberration,Straying away from what is normal
7,abet,Help/encourage smb (in doing wrong)
8,abeyance,"Temporary suspension, inactivity; Suspended ac..."
9,abhor,"Detest, regard with disgust; To hate to detest..."


In [35]:
# Need to double check for duplicates again
def check_duplicates(list_of_words):
    result = False
    setofwords = set()
    for elem in list_of_words:
        if elem in setofwords:
            result = True
        else: 
            setofwords.add(elem)
    if result == True:
        print("This list contains duplicated words.")
    elif result == False:
        print("There are no duplicate words.")

check_duplicates(vocabulary_list["Word"])

There are no duplicate words.


In [42]:
vocabulary_list = vocabulary_list.set_index('Word')

In [43]:
vocabulary_list.head(6)

Unnamed: 0_level_0,Definition
Word,Unnamed: 1_level_1
abacus,Frame with balls for calculating
abase,"Degrade or humble; to lower in rank, status, o..."
abate,"Reduce, diminish; To lessen to subside"
abdicate,Formally give up the throne (or some other pow...
abdication,Giving up control authority
aberrant,"Abnormal, deviant"


In [44]:
# Need to create a copy of sorted and cleaned vocabulary that is different from raw data .csv file 
vocabulary_list.to_csv(r'C:\Users\jacqu\Desktop\Python Portfolio\Vocabulary Flashcard Quiz\vfq vocabulary list.csv')