# Data Cleaning and Preparing

## Step 1: initialization 

In [1]:
import pandas as pd
import numpy as np

import nltk
import string
import re

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

In [2]:
#initialize the lematizer and stemmer, which will be used later.
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

#to be used in the cleaning function
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

print(stopwords.words('english'))
#print(stopwords.words('chinese'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

[nltk_data] Downloading package stopwords to /Users/bill/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/bill/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/bill/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/bill/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


## Step 2: Clean the Data
The dataset, EMSCAD, is downloaded from Kaggle, the link is here:
https://www.kaggle.com/datasets/amruthjithrajvr/recruitment-scam

In [3]:
#load the EMSCAD dataset
df = pd.read_csv('./DataSet.csv')
print("The shape of the dataframe is",df.shape) 

The shape of the dataframe is (17880, 18)


### Get the dataframe for only the Job description column.

In [4]:
df_jd = pd.DataFrame(df['description'])
print("The shape of the dataframe is", df_jd.shape) 
print("Display one Job description sample:\n") 
print(df_jd['description'][0]) 

The shape of the dataframe is (17880, 1)
Display one Job description sample:

<p>Food52, a fast-growing, James Beard Award-winning online food community and crowd-sourced and curated recipe hub, is currently interviewing full- and part-time unpaid interns to work in a small team of editors, executives, and developers in its New York City headquarters.</p>
<ul>
<li>Reproducing and/or repackaging existing Food52 content for a number of partner sites, such as Huffington Post, Yahoo, Buzzfeed, and more in their various content management systems</li>
<li>Researching blogs and websites for the Provisions by Food52 Affiliate Program</li>
<li>Assisting in day-to-day affiliate program support, such as screening affiliates and assisting in any affiliate inquiries</li>
<li>Supporting with PR &amp; Events when needed</li>
<li>Helping with office administrative work, such as filing, mailing, and preparing for meetings</li>
<li>Working with developers to document bugs and suggest improvement

### To display the Job description sample in a more readable way.

In [5]:
from IPython.core.display import display, HTML
display(HTML(df_jd['description'][0]))

### Prepare the cleaning function

In [6]:
# remove the HTML tags
def striphtml(data):
    p = re.compile(r'<.*?>')
    return p.sub('', data)


def clean(text):
    
    # remove the HTML tags
    #text = striphtml(text)
    
    # Lowercase text
    text = text.lower()
    
    # Remove punctuation
    text = text.replace(':', ' ')
    text = text.replace('\'', ' ')
    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator)
    
    # Remove extra spaces from text
    text = " ".join(text.split())
    
    # Remove stopwords function
    # Tokenize : get a list of tokens
    stop_words = set(stopwords.words("english")) # nltk.download('stopwords') - this is done at the begining
    word_tokens = word_tokenize(text)
    text = [word for word in word_tokens if word not in stop_words]
    
    # Lemmatize words
    text = [lemmatizer.lemmatize(word, pos ='v') for word in text]
    
    # Stem words
    text = [stemmer.stem(word) for word in text]
    
    return text

In [7]:
def simple_clean(text):
    # Lowercase text
    text = text.lower()
    
    # Remove numbers
    #text = re.sub(r'\d+', '', text)
    
    # Remove punctuation
    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator)
    return text

In [8]:
# for now, I only used some of the examples (df_jd['description'][1:100]), due to the computing capability.

for index, sentence in df_jd['description'][1:2].iteritems(): 
    #df_jd_sentence.add(clean(sentence))
    print(clean(sentence))


['organis', 'focu', 'vibrant', 'awesomedo', 'passion', 'custom', 'servic', 'slick', 'type', 'skill', 'mayb', 'account', 'manag', 'think', 'administr', 'cooler', 'polar', 'bear', 'jetski', 'need', 'hear', 'cloud', 'video', 'product', 'servic', 'opper', 'glodal', 'level', 'yeah', 'pretti', 'cool', 'seriou', 'deliv', 'world', 'class', 'product', 'excel', 'custom', 'serviceour', 'rapidli', 'expand', 'busi', 'look', 'talent', 'project', 'manag', 'manag', 'success', 'deliveri', 'video', 'project', 'manag', 'client', 'commun', 'drive', 'product', 'process', 'work', 'coolest', 'brand', 'planet', 'learn', 'global', 'team', 'repres', 'nz', 'huge', 'way', 'enter', 'next', 'growth', 'stage', 'busi', 'grow', 'quickli', 'intern', 'therefor', 'posit', 'burst', 'opportun', 'right', 'person', 'enter', 'busi', 'right', 'time', '90', 'second', 'world', 'cloud', 'video', 'product', 'servic', 'http', '90urlfbe6559afac620a3cd2c22281f7b8d0eef56a73e3d9a311e2f1ca13d081dd630', '90', 'second', 'world', 'cloud', 

# Step 2: preparing the data

In [18]:
# get the list of biased words or phrases
df_biased_words = pd.read_excel("./bias_words.xlsx")
print(df_biased_words.shape)
print(df_biased_words.head())
print(df_biased_words['Masculine/Feminine Bias'].value_counts())

(159, 2)
  Biased Words or Phrases Masculine/Feminine Bias
0                  active          Masculine Bias
1             adventurous          Masculine Bias
2                 aggress          Masculine Bias
3                 ambitio          Masculine Bias
4                   analy          Masculine Bias
Masculine Bias    95
Feminine Bias     64
Name: Masculine/Feminine Bias, dtype: int64


In [19]:
# exclude generic he/she and dupulicated words in df_biased_words
words_to_exclude = ['she', 'her', 'hers', 'herself', 'he', 'himself', 'him', 'his']
df_biased_words = df_biased_words[~df_biased_words['Biased Words or Phrases'].isin(words_to_exclude)]
df_biased_words = df_biased_words.drop_duplicates()

print(df_biased_words.shape)
print(df_biased_words.isna().any())# check if there's any empty cells
print(df_biased_words.head())
print(df_biased_words['Masculine/Feminine Bias'].value_counts())

(137, 2)
Biased Words or Phrases    False
Masculine/Feminine Bias    False
dtype: bool
  Biased Words or Phrases Masculine/Feminine Bias
0                  active          Masculine Bias
1             adventurous          Masculine Bias
2                 aggress          Masculine Bias
3                 ambitio          Masculine Bias
4                   analy          Masculine Bias
Masculine Bias    81
Feminine Bias     56
Name: Masculine/Feminine Bias, dtype: int64


In [20]:
# get the list of male_words and female words
male_words = (df_biased_words.loc[df_biased_words['Masculine/Feminine Bias'] == 'Masculine Bias'])['Biased Words or Phrases'].values
female_words = (df_biased_words.loc[df_biased_words['Masculine/Feminine Bias'] == 'Feminine Bias'])['Biased Words or Phrases'].values

In [239]:
print(male_words)
print(len(male_words))

['active' 'adventurous' 'aggress' 'ambitio' 'analy' 'assert' 'athlet'
 'autonom' 'boast' 'challeng' 'compet' 'confident' 'courag' 'decide'
 'decisive' 'decision' 'determin' 'dominant' 'domina' 'force' 'greedy'
 'headstrong' 'hierarch' 'hostil' 'implusive' 'independen' 'individual'
 'intellect' 'lead' 'logic' 'masculine' 'objective' 'opinion' 'outspoken'
 'persist' 'principle' 'reckless' 'stubborn' 'superior' 'self-confiden'
 'self-sufficien' 'self-relian' 'manmade' 'chairman' 'son' 'fireman'
 'freshman' 'man' 'mankind' 'manpower' 'boyfriend' 'husband' 'policeman'
 'walter' 'brother' 'spokesman' 'upperclassman' 'gentleman' 'alumnus'
 'alumni' 'man up' 'Mr.' 'man-made' 'the common man' 'mailman' 'steward'
 'actor' 'congressman' 'acts as a leader' 'aggressive' 'ambitious'
 'analytical' 'assertive' 'athletic' 'competitive' 'defends own beliefs'
 'forceful' 'has leadership abilities' 'independent' 'individualistic'
 'makes decisions easily']
81


In [240]:
print(female_words)
print(len(female_words))

['affectionate' 'child' 'cheer' 'commit' 'communal' 'compassion' 'connect'
 'considerate' 'cooperat' 'depend' 'emotiona' 'empath' 'feminine'
 'flatterable' 'gentle' 'honest' 'interpersonal' 'interdependen'
 'interpersona' 'kind' 'kinship' 'loyal' 'modesty' 'nag' 'nurtur'
 'pleasant' 'polite' 'quiet' 'respon' 'sensitiv' 'submissive' 'support'
 'sympath' 'tender' 'together' 'trust' 'understand' 'warm' 'whin' 'yield'
 'daughter' 'wife' 'girlfriend' 'waitress' 'sister' 'ladies' 'alumna'
 'alumnae' 'hysterical' 'shrill' 'nagging' 'Mrs.' 'Miss.' 'Ms.'
 'stewardess' 'actress']
56


In [23]:
#start to test for checking male and female worsd

In [None]:
### using sample data containing only 3 sentences

In [157]:
data = {'sentences': ['Must be an extrovert with an innate quality of easily connecting with people.', 
                      'You are self-motivated and decisive, but willing to make changes with minimal grumbling when the client demands it.', 
                      'We are looking for a young and driven candidate who can bring innovation into the organization.']}

df_test_sentences = pd.DataFrame(data)

In [158]:
df_test_sentences

Unnamed: 0,sentences
0,Must be an extrovert with an innate quality of...
1,"You are self-motivated and decisive, but willi..."
2,We are looking for a young and driven candidat...


In [156]:
### using real data extracted from EMSCAD

In [265]:
df_jd.shape

(17880, 1)

In [266]:
df_jd_sentences = df_jd['description'][0:1000]

In [267]:
df_jd_sentences.head()

0    <p>Food52, a fast-growing, James Beard Award-w...
1    <p>Organised - Focused - Vibrant - Awesome!<br...
2    <p>Our client, located in Houston, is actively...
3    <p><b>THE COMPANY: ESRI – Environmental System...
4    <p><b>JOB TITLE:</b> Itemization Review Manage...
Name: description, dtype: object

In [268]:
#temp_sentences = nltk.sent_tokenize(striphtml(df_jd_sentences[3]))

In [269]:
list_sentences = []

for index, sent in df_jd_sentences.iteritems():
    list_sentences += nltk.sent_tokenize(striphtml(sent))
    #print(index)

In [270]:
len(list_sentences)

7303

In [271]:
df_real_sentences = pd.DataFrame()
df_real_sentences['sentence'] = list_sentences
df_real_sentences.head(20)

Unnamed: 0,sentence
0,"Food52, a fast-growing, James Beard Award-winn..."
1,Reproducing and/or repackaging existing Food52...
2,Organised - Focused - Vibrant - Awesome!Do you...
3,Slick typing skills?
4,Maybe Account Management?
5,...And think administration is cooler than a p...
6,Then we need to hear you!
7,We are the Cloud Video Production Service and ...
8,"Yeah, it's pretty cool."
9,Serious about delivering a world class product...


In [272]:
df_real_sentences

Unnamed: 0,sentence
0,"Food52, a fast-growing, James Beard Award-winn..."
1,Reproducing and/or repackaging existing Food52...
2,Organised - Focused - Vibrant - Awesome!Do you...
3,Slick typing skills?
4,Maybe Account Management?
...,...
7298,Supply planning experience a plus.
7299,Please only apply if you have heavy Quickbooks...
7300,Send resume with salary requirements.
7301,PINT is seeking a detail-oriented and impassio...


## The following block is to find out in each sentence if there is any male or female words

In [273]:
df_check_result = pd.DataFrame()

for index, sentence in df_real_sentences.iterrows():
    temp_sentence = sentence['sentence']
    tokenized_sentence = clean(temp_sentence)
    
    # there will be columns named 'category', 'word_in_sentences', and 'biased_term' in 'df_check_result' 
    category = 'neutral' 
    word_in_sentence = 'None' # 
    word = 'None' # 
    
    # check for male words, and them put the outcome to 
    for male_word in male_words:
        if re.search(r"\b{}\b".format(male_word), temp_sentence.lower().strip()): # search for 'male_word' in 'temp_sentence' using RE
            # set output if 'male_word' is found
            category = 'masculine'
            word_in_sentence = male_word
            word = male_word
            #when there is no male word in the temp_sentence 
        else:
            for token in tokenized_sentence:
                if len(male_word) > 3:
                    if simple_clean(male_word) == token[:len(male_word)]: # check if the male_word is found at the beginning of the token
                        category = 'masculine'
                        word_in_sentence = token
                        word = male_word
                    elif simple_clean(male_word) == token[-len(male_word):]: # check if the male_word is found at the end of the token
                        category = 'masculine'
                        word_in_sentence = token
                        word = male_word
            
    if category == 'masculine': # put the outcome in a dict, then append them to 'df_check_result'
        dict = {'sentence': temp_sentence,
                'word_in_Sentence': word_in_sentence,
                'biased_term': word,
                'category': category
               }
        #df_check_result = df_check_result.append(dict, ignore_index = True)
        df_check_result = pd.concat([df_check_result, pd.DataFrame([dict])], ignore_index=True)

        
        
    # the completely same process for checking for female words
    for female_word in female_words:
        if re.search(r"\b{}\b".format(female_word), temp_sentence.lower().strip()):
            category = 'feminine'
            word_in_sentence = female_word
            word = female_word
        else:
            for token in tokenized_sentence:
                if len(female_word) > 3:
                    if simple_clean(female_word) == token[:len(female_word)]:
                        category = 'feminine'
                        word_in_sentence = token
                        word = female_word
                    elif simple_clean(female_word) == token[-len(female_word):]:
                        category = 'feminine'
                        word_in_sentence = token
                        word = female_word
                    
    if category == 'feminine':
        dict = {'sentence': temp_sentence,
                'word_in_Sentence': word_in_sentence,
                'biased_term': word,
                'category': category
               }
        #df_check_result = df_check_result.append(dict, ignore_index = True)
        df_check_result = pd.concat([df_check_result, pd.DataFrame([dict])], ignore_index=True)

    
    
    # This is to roughly monitor how many lines/sentences have been proceeded when running this block of code
    if index%100 == 0:
        print(f'more than {index} sentences have been processed.' )
        
    #print(f'{index + 1} sentences have been processed.' )

more than 0 sentences have been processed.
more than 100 sentences have been processed.
more than 200 sentences have been processed.
more than 300 sentences have been processed.
more than 400 sentences have been processed.
more than 500 sentences have been processed.
more than 600 sentences have been processed.
more than 700 sentences have been processed.
more than 800 sentences have been processed.
more than 900 sentences have been processed.
more than 1000 sentences have been processed.
more than 1100 sentences have been processed.
more than 1200 sentences have been processed.
more than 1300 sentences have been processed.
more than 1400 sentences have been processed.
more than 1500 sentences have been processed.
more than 1600 sentences have been processed.
more than 1700 sentences have been processed.
more than 1800 sentences have been processed.
more than 1900 sentences have been processed.
more than 2000 sentences have been processed.
more than 2100 sentences have been processed.


In [300]:
# all the reult frome checking male/female words are now put into 'df_check_result'
df_check_result

Unnamed: 0,sentence,word_in_Sentence,biased_term,category
0,Reproducing and/or repackaging existing Food52...,support,support,feminine
1,Esri’s geographic information system (GIS) tec...,understand,understand,feminine
2,"Privately held, we offer exceptional benefits,...",competitive,competitive,masculine
3,"As part of an account team, you will be respon...",respons,respon,feminine
4,Specifically…\r\n\r\nProspect and develop oppo...,competitive,competitive,masculine
...,...,...,...,...
2775,"As Executive Sous Chef, you would be responsib...",respons,respon,feminine
2776,"Specifically, you would be responsible for per...",respons,respon,feminine
2777,"A/R, A/P, General Ledger, reconciliations / re...",respons,respon,feminine
2778,PINT is seeking a detail-oriented and impassio...,individual,individual,masculine


## This block is to find out male or female words in sentences as well, only to store outcomes in different form

In [301]:
# I'm using df_jd_sentences to store sentences extracted from EMSCAD dataset.

In [276]:
df_jd_result = pd.DataFrame()

for index, sentence in df_real_sentences.iterrows():
    temp_sentence = sentence['sentence']
    tokenized_sentence = clean(temp_sentence)
    words_in_sentence = []
    words = []
    
    if len(temp_sentence) < 180 and temp_sentence[0].isupper():
        # check for male words
        for male_word in male_words:
            if re.search(r"\b{}\b".format(male_word), temp_sentence.lower().strip()):
                #df_check_result = df_check_result.append(dict, ignore_index = True)
                #df_check_result = pd.concat([df_check_result, pd.DataFrame([dict])], ignore_index=True)
                
                words_in_sentence.append([male_word, 'M'])
                words.append([male_word, 'M'])
            else:
                for token in tokenized_sentence:
                    if len(male_word) > 3:
                        if simple_clean(male_word) == token[:len(male_word)]:
                            words_in_sentence.append([token, 'M'])
                            words.append([male_word, 'M'])
                        elif simple_clean(male_word) == token[-len(male_word):]:
                            if token[:len(male_word)] != token[-len(male_word):]:
                                words_in_sentence.append([token, 'M'])
                                words.append([male_word, 'M'])

        # check for female words
        for female_word in female_words:
            if re.search(r"\b{}\b".format(female_word), temp_sentence.lower().strip()):
                words_in_sentence.append([female_word, 'F'])
                words.append([male_word, 'F'])
            else:
                for token in tokenized_sentence:
                    if len(female_word) > 3:
                        if simple_clean(female_word) == token[:len(female_word)]:
                            words_in_sentence.append([token, 'F'])
                            words.append([female_word, 'F'])
                        elif simple_clean(female_word) == token[-len(female_word):]:
                            if token[:len(female_word)] != token[-len(female_word):]:
                                words_in_sentence.append([token, 'F'])
                                words.append([female_word, 'F'])

        if len(words) > 0:
            dict = {'sentence': temp_sentence,
                    'word_in_Sentence': words_in_sentence,
                    'biased_term': words}
            df_jd_result = df_jd_result.append(dict, ignore_index = True)

        if index%10000 == 0:
            print(index)

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dic

  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)
  df_jd_result = df_jd_result.append(dict, ignore_index = True)


In [299]:
df_jd_result

Unnamed: 0,sentence,word_in_Sentence,biased_term
0,"Privately held, we offer exceptional benefits,...","[[competit, M], [competitive, M]]","[[compet, M], [competitive, M]]"
1,"As part of an account team, you will be respon...","[[respons, F]]","[[respon, F]]"
2,Job Overview\r\nApex is an environmental consu...,"[[leadership, M]]","[[lead, M]]"
3,Driven by an entrepreneurial spirit and a dedi...,"[[respons, F]]","[[respon, F]]"
4,Working in partnership with our public and pri...,"[[support, F]]","[[makes decisions easily, F]]"
...,...,...,...
1372,We lead with numbers and metrics.,"[[lead, M]]","[[lead, M]]"
1373,"It provides a single, integrated platform for ...","[[support, F]]","[[makes decisions easily, F]]"
1374,"Founded in late 2011, Intercom has raised over...","[[lead, M]]","[[lead, M]]"
1375,"A/R, A/P, General Ledger, reconciliations / re...","[[respons, F]]","[[respon, F]]"


In [278]:
def clean_sentence(sentence):
    temp = " ".join(sentence.split())
    temp = temp.strip()

    char_to_remove = 0
    for x in temp.split()[0]:
        if not x.isalpha():
            char_to_remove += 1
    temp = temp[char_to_remove: len(temp) - char_to_remove]
    return sentence

'''
In Pandas, the map() method is used to apply a function to every element of a Series object
or a column of a DataFrame object. The map() method takes a function as an argument and applies
it to each element of the Series or column, returning a new Series or column with the results of
the function applied to each element.
'''

#cleaning the values in the 'sentence' column of 'df_new' using the 'clean_sentence' function
df_jd_result['sentence'] = df_jd_result.sentence.map(lambda x:clean_sentence(x))

In [288]:
df_jd_result

Unnamed: 0,sentence,word_in_Sentence,biased_term
0,"Privately held, we offer exceptional benefits,...","[[competit, M], [competitive, M]]","[[compet, M], [competitive, M]]"
1,"As part of an account team, you will be respon...","[[respons, F]]","[[respon, F]]"
2,Job Overview\r\nApex is an environmental consu...,"[[leadership, M]]","[[lead, M]]"
3,Driven by an entrepreneurial spirit and a dedi...,"[[respons, F]]","[[respon, F]]"
4,Working in partnership with our public and pri...,"[[support, F]]","[[makes decisions easily, F]]"
...,...,...,...
1372,We lead with numbers and metrics.,"[[lead, M]]","[[lead, M]]"
1373,"It provides a single, integrated platform for ...","[[support, F]]","[[makes decisions easily, F]]"
1374,"Founded in late 2011, Intercom has raised over...","[[lead, M]]","[[lead, M]]"
1375,"A/R, A/P, General Ledger, reconciliations / re...","[[respons, F]]","[[respon, F]]"


### explore filtered sentences

In [289]:
df_jd_result.biased_term.value_counts()

[[respon, F]]                              259
[[makes decisions easily, F]]              184
[[lead, M]]                                169
[[support, F]]                             126
[[analy, M]]                                84
                                          ... 
[[steward, M]]                               1
[[challeng, M], [nurtur, F]]                 1
[[decisive, M]]                              1
[[decision, M], [respon, F]]                 1
[[commit, F], [respon, F], [respon, F]]      1
Name: biased_term, Length: 144, dtype: int64

In [290]:
df_jd_result.biased_term.value_counts().values

array([259, 184, 169, 126,  84,  42,  38,  38,  35,  24,  24,  17,  15,
        14,  11,  11,  10,   9,   9,   9,   9,   9,   9,   9,   9,   8,
         7,   7,   6,   6,   5,   5,   5,   4,   4,   4,   3,   3,   3,
         3,   3,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
         2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
         2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
         1])

In [292]:
found_words = df_jd_result.biased_term.value_counts().keys()
print(found_words)

Index([                                   [['respon', 'F']],
                          [['makes decisions easily', 'F']],
                                            [['lead', 'M']],
                                         [['support', 'F']],
                                           [['analy', 'M']],
                                        [['challeng', 'M']],
                                      [['individual', 'M']],
                                      [['understand', 'F']],
                                          [['commit', 'F']],
                                          [['depend', 'F']],
       ...
         [['athlet', 'M'], ['makes decisions easily', 'F']],
                         [['athlet', 'M'], ['compet', 'M']],
                       [['superior', 'M'], ['respon', 'F']],
       [['lead', 'M'], ['objective', 'M'], ['respon', 'F']],
                          [['depend', 'F'], ['trust', 'F']],
                                         [['steward', 'M']],
             

In [298]:
list_found_words = []
for item in found_words:
    list_found_words.append(item[0][0])
print(list_found_words)
print(len(found_words))

['connect', 'decisive']
2


In [294]:
not_found_words = []
found_words = df_new.biased_term.value_counts().keys()
for word in list(male_words) + list(female_words):  # frome the male/female words list
    if word not in list_found_words:
        not_found_words.append(word)

In [296]:
print(len(not_found_words))
print(not_found_words)

88
['assert', 'boast', 'decide', 'domina', 'greedy', 'headstrong', 'hierarch', 'hostil', 'implusive', 'independen', 'masculine', 'outspoken', 'reckless', 'stubborn', 'self-confiden', 'self-sufficien', 'self-relian', 'manmade', 'chairman', 'son', 'fireman', 'freshman', 'mankind', 'boyfriend', 'husband', 'policeman', 'walter', 'brother', 'spokesman', 'upperclassman', 'gentleman', 'alumnus', 'man up', 'Mr.', 'man-made', 'the common man', 'mailman', 'congressman', 'acts as a leader', 'aggressive', 'analytical', 'assertive', 'athletic', 'competitive', 'defends own beliefs', 'forceful', 'has leadership abilities', 'individualistic', 'affectionate', 'cheer', 'communal', 'considerate', 'cooperat', 'emotiona', 'feminine', 'flatterable', 'gentle', 'interpersonal', 'interdependen', 'interpersona', 'kinship', 'modesty', 'nag', 'polite', 'quiet', 'sensitiv', 'submissive', 'sympath', 'tender', 'together', 'warm', 'whin', 'yield', 'daughter', 'wife', 'girlfriend', 'waitress', 'sister', 'ladies', 'alu

### save data

In [286]:
df_jd_result.shape

(1377, 3)

In [287]:
# add a coloum named sor

annotation = []

for index, row in df_jd_result.iterrows():
    annotation.append(row[1][0][0])

In [259]:
annotation

['competit',
 'respons',
 'leadership',
 'respons',
 'support',
 'superior',
 'kind',
 'leader',
 'lead',
 'respons',
 'leadership',
 'support',
 'respons',
 'support',
 'respons',
 'support',
 'individualsif',
 'competit',
 'determin',
 'support',
 'support',
 'persist',
 'respons',
 'lead',
 'leader',
 'lead',
 'support',
 'respons',
 'lead',
 'support',
 'analys',
 'support',
 'support',
 'challeng',
 'contractor',
 'commit',
 'support',
 'depend',
 'commit',
 'analyt',
 'analyt',
 'leadership',
 'analyt',
 'confidenti',
 'analyst',
 'support',
 'respons',
 'support',
 'support',
 'encourag',
 'together',
 'leader',
 'leader',
 'analyz',
 'support',
 'nurtur',
 'empathi',
 'understand',
 'determin',
 'support',
 'respons',
 'support',
 'respons',
 'lead',
 'support',
 'respons',
 'support',
 'respons',
 'lead',
 'support',
 'respons',
 'support',
 'individual',
 'analysi',
 'leadership',
 'support',
 'empathi',
 'empathi',
 'independent',
 'together',
 'challeng',
 'respons',
 'supp

In [260]:
df_jd_result['annotation'] = annotation

In [261]:
df_jd_result

Unnamed: 0,sentence,word_in_Sentence,biased_term,annotation
0,"Privately held, we offer exceptional benefits,...","[[competit, M], [competitive, M]]","[[compet, M], [competitive, M]]",competit
1,"As part of an account team, you will be respon...","[[respons, F]]","[[respon, F]]",respons
2,Job Overview\r\nApex is an environmental consu...,"[[leadership, M]]","[[lead, M]]",leadership
3,Driven by an entrepreneurial spirit and a dedi...,"[[respons, F]]","[[respon, F]]",respons
4,Working in partnership with our public and pri...,"[[support, F]]","[[makes decisions easily, F]]",support
...,...,...,...,...
132,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
133,Troubleshoots and performs maintenance on DC b...,"[[logic, M]]","[[logic, M]]",logic
134,"Schedule, coordinate, work with and monitor co...","[[contractor, M]]","[[actor, M]]",contractor
135,Assists other team members and works with all ...,"[[support, F]]","[[makes decisions easily, F]]",support


In [262]:
df1, df2 = np.array_split(df_jd_result, 2)

In [263]:
df1

Unnamed: 0,sentence,word_in_Sentence,biased_term,annotation
0,"Privately held, we offer exceptional benefits,...","[[competit, M], [competitive, M]]","[[compet, M], [competitive, M]]",competit
1,"As part of an account team, you will be respon...","[[respons, F]]","[[respon, F]]",respons
2,Job Overview\r\nApex is an environmental consu...,"[[leadership, M]]","[[lead, M]]",leadership
3,Driven by an entrepreneurial spirit and a dedi...,"[[respons, F]]","[[respon, F]]",respons
4,Working in partnership with our public and pri...,"[[support, F]]","[[makes decisions easily, F]]",support
...,...,...,...,...
64,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
65,Responsible for all aspects of equipment.,"[[respons, F]]","[[respon, F]]",respons
66,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
67,Key responsibilities:\r\n\r\nOperate Switchboa...,"[[respons, F]]","[[respon, F]]",respons


In [264]:
df2

Unnamed: 0,sentence,word_in_Sentence,biased_term,annotation
69,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
70,"Responsibilities include, but are not limited ...","[[respons, F]]","[[respon, F]]",respons
71,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
72,DESCRIPTION\r\n\r\n\r\nHighly motivated indivi...,"[[individual, M], [support, F], [understand, F]]","[[individual, M], [makes decisions easily, F],...",individual
73,"Experience with business analysis, requirement...","[[analysi, M], [support, F]]","[[analy, M], [makes decisions easily, F]]",analysi
...,...,...,...,...
132,The right candidate will be an integral part o...,"[[support, F]]","[[support, F]]",support
133,Troubleshoots and performs maintenance on DC b...,"[[logic, M]]","[[logic, M]]",logic
134,"Schedule, coordinate, work with and monitor co...","[[contractor, M]]","[[actor, M]]",contractor
135,Assists other team members and works with all ...,"[[support, F]]","[[makes decisions easily, F]]",support
