In [106]:
from bs4 import BeautifulSoup # For HTML parsing
import urllib.request # Website connections
import re # Regular expressions
from time import sleep # To prevent overwhelming the server between connections
from collections import Counter # Keep track of our term counts
from nltk.corpus import stopwords # Filter out stopwords, such as 'the', 'or', 'and'
import pandas as pd # For converting results to a dataframe and bar chart plots
%matplotlib inline

In [39]:
def text_cleaner(website):
    '''
    This function just cleans up the raw html so that I can look at it.
    Inputs: a URL to investigate
    Outputs: Cleaned text only
    '''
    try:
        site = urllib.request.urlopen(website).read() # Connect to the job posting
    except: 
        print('site not found')
        return   # Need this in case the website isn't there anymore or some other weird connection problem 
    
    soup_obj = BeautifulSoup(site, 'lxml') # Get the html from the site
    
    if len(soup_obj) == 0: # In case the default parser lxml doesn't work, try another one
        soup_obj = BeautifulSoup(site, 'html5lib')
    
    for script in soup_obj(["script", "style"]):
        script.extract() # Remove these two elements from the BS4 object
    
    text = soup_obj.get_text() # Get the text from this
    lines = (line.strip() for line in text.splitlines()) # break into lines
    chunks = (phrase.strip() for line in lines for phrase in line.split("  ")) # break multi-headlines into a line each
    text = ''.join(chunk for chunk in chunks if chunk).encode('utf-8') # Get rid of all blank lines and ends of line
    
    # Now clean out all of the unicode junk (this line works great!!!)
    try:
        text = text.decode('unicode_escape').encode('ascii', 'ignore') # Need this as some websites aren't formatted
    except:                                                            # in a way that this works, can occasionally throw
        print ('unable to decode')
        return                                                         # an exception
    
    # decode the bytes to string so that regex can be used
    text = text.decode('ISO-8859-1')
    text = re.sub("[^a-zA-Z+3]"," ", text)  # Now get rid of any terms that aren't words (include 3 for d3.js)
                                             # Also include + for C++
    text = re.sub(r"([a-z])([A-Z])", r"\1 \2", text) # Fix spacing issue from merged words
    text = text.lower().split()  # Go to lower case and split them apart
    stop_words = set(stopwords.words("english")) # Filter out any stop words
    text = [w for w in text if not w in stop_words]
    text = list(set(text)) # Last, just get the set of these. Ignore counts (we are just looking at whether a term existed
                           # or not on the website)
    return text

In [40]:
sample = text_cleaner('http://www.indeed.com/viewjob?jk=5505e59f8e5a32a4&q=%22data+scientist%22&tk=19ftfgsmj19ti0l3&from=web&advn=1855944161169178&sjdu=QwrRXKrqZ3CNX5W-O9jEvWC1RT2wMYkGnZrqGdrncbKqQ7uwTLXzT1_ME9WQ4M-7om7mrHAlvyJT8cA_14IV5w&pub=pub-indeed')
sample[:20] # Just show the first 20 words

['programs',
 'p',
 'econometrics',
 'searches',
 'trading',
 'bonus',
 'behavior',
 'learning',
 'si',
 'nlp',
 'engineer',
 'fitness',
 'catered',
 'waindeed',
 'meals',
 'per',
 'keywords',
 'vision',
 'waamazon',
 'commuter']

In [82]:
def skills_info(city = None, state = None):
    '''
    This function will take a desired city/state and look for all new job postings
    on Indeed.com. It will crawl all of the job postings and keep track of how many
    use a preset list of typical data science skills. The final percentage for each skill
    is then displayed at the end of the collation. 
    
    Inputs: The location's city and state. These are optional. If no city/state is input, 
    the function will assume a national search (this can take a while!!!).
    Input the city/state as strings, such as skills_info('Chicago', 'IL').
    Use a two letter abbreviation for the state.
    
    Output: A bar chart showing the most commonly desired skills in the job market for 
    a data scientist. 
    '''
    
    final_job = 'data+scientist' # searching for data scientist exact fit("data scientist" on Indeed search)
    
    # Make sure the city specified works properly if it has more than one word (such as San Francisco)
    if city is not None:
        final_city = city.split() 
        final_city = '+'.join(word for word in final_city)
        final_site_list = ['http://www.indeed.com/jobs?q=%22', final_job, '%22&l=', final_city,
                   '%2C+', state] # Join all of our strings together so that indeed will search correctly
    else:
        final_site_list = ['http://www.indeed.com/jobs?q="', final_job, '"']

    final_site = ''.join(final_site_list) # Merge the html address together into one string
    base_url = 'http://www.indeed.com'

    try:
        html = urllib.request.urlopen(final_site).read() # Open up the front page of our search first
    except:
        'That city/state combination did not have any jobs. Exiting . . .' # In case the city is invalid
        return
    soup = BeautifulSoup(html, 'lxml') # Get the html from the first page

    # Now find out how many jobs there were
    num_jobs_area = soup.find(id = 'searchCount').string.encode('utf-8') # Now extract the total number of jobs found
    
    num_jobs_area = num_jobs_area.decode('ISO-8859-1')# The 'searchCount' object has this
    
    job_numbers = re.findall('\d+', num_jobs_area) # Extract the total jobs found from the search result
    
    print('num_jobs_area'+ str(num_jobs_area))
    print('job_numbers' + str(job_numbers))
    
    if len(job_numbers) > 3: # Have a total number of jobs greater than 1000
        total_num_jobs = (int(job_numbers[1])*1000) + int(job_numbers[3])
    else:
        total_num_jobs = int(job_numbers[1]) 
    
    city_title = city
    if city is None:
        city_title = 'Nationwide'
    print ('There were {} jobs found, {}'.format(total_num_jobs,city_title)) # Display how many jobs were found
    num_pages = total_num_jobs/10 # This will be how we know the number of times we need to iterate over each new
                                  # search result page
    job_descriptions = [] # Store all our descriptions in this list
    
    for i in xrange(1,num_pages+1): # Loop through all of our search result pages
        print ('Getting page '+ i)
        start_num = str(i*10) # Assign the multiplier of 10 to view the pages we want
        current_page = ''.join([final_site, '&start=', start_num])
        
        # Now that we can view the correct 10 job returns, start collecting the text samples from each
        html_page = urllib.request.urlopen(current_page).read() # Get the page
        page_obj = BeautifulSoup(html_page) # Locate all of the job links
        job_link_area = page_obj.find(id = 'resultsCol') # The center column on the page where the job postings exist
        job_URLS = [base_url + link.get('href') for link in job_link_area.find_all('a')] # Get the URLS for the jobs
        job_URLS = filter(lambda x:'clk' in x, job_URLS) # Now get just the job related URLS
        
        for j in xrange(0,len(job_URLS)):
            final_description = text_cleaner(job_URLS[j])
            if final_description: # So that we only append when the website was accessed correctly
                job_descriptions.append(final_description)
            sleep(1) # So that we don't be jerks. If you have a very fast internet connection you could hit the server a lot! 
    
    print ('Done with collecting the job postings!')  
    print ('There were {} jobs successfully found.'.format(len(job_descriptions)))


    doc_frequency = Counter() # This will create a full counter of our terms. 
    [doc_frequency.update(item) for item in job_descriptions] # List comp

    # Now we can just look at our final dict list inside doc_frequency

    # Obtain our key terms and store them in a dict. These are the key data science skills we are looking for
    prog_lang_dict = Counter({'R':doc_frequency['r'], 'Python':doc_frequency['python'],
                    'Java':doc_frequency['java'], 'C++':doc_frequency['c++'],
                   'Ruby':doc_frequency['ruby'],
                  'Perl':doc_frequency['perl'], 'Matlab':doc_frequency['matlab'],
                  'JavaScript':doc_frequency['javascript'], 'Scala': doc_frequency['scala']})
                  
    analysis_tool_dict = Counter({'Excel':doc_frequency['excel'],  'Tableau':doc_frequency['tableau'],
                      'D3.js':doc_frequency['d3.js'], 'SAS':doc_frequency['sas'],
                      'SPSS':doc_frequency['spss'], 'D3':doc_frequency['d3']})  

    hadoop_dict = Counter({'Hadoop':doc_frequency['hadoop'], 'MapReduce':doc_frequency['mapreduce'],
               'Spark':doc_frequency['spark'], 'Pig':doc_frequency['pig'],
               'Hive':doc_frequency['hive'], 'Shark':doc_frequency['shark'],
               'Oozie':doc_frequency['oozie'], 'ZooKeeper':doc_frequency['zookeeper'],
               'Flume':doc_frequency['flume'], 'Mahout':doc_frequency['mahout']})
               
    database_dict = Counter({'SQL':doc_frequency['sql'], 'NoSQL':doc_frequency['nosql'],
                 'HBase':doc_frequency['hbase'], 'Cassandra':doc_frequency['cassandra'],
                 'MongoDB':doc_frequency['mongodb']})
                      
    overall_total_skills = prog_lang_dict + analysis_tool_dict + hadoop_dict + database_dict # Combine our Counter objects
    final_frame = pd.DataFrame(overall_total_skills.items(), columns = ['Term', 'NumPostings']) # Convert these terms to a 
                                                                                                # dataframe 
    # Change the values to reflect a percentage of the postings 
    final_frame.NumPostings = (final_frame.NumPostings)*100/len(job_descriptions) # Gives percentage of job postings 
                                                                                  #  having that term 
    # Sort the data for plotting purposes
    final_frame.sort(columns = 'NumPostings', ascending = False, inplace = True)

    # Get it ready for a bar plot
    final_plot = final_frame.plot(x = 'Term', kind = 'bar', legend = None, 
                              title = 'Percentage of Data Scientist Job Ads with a Key Skill, ' + city_title)
    
    final_plot.set_ylabel('Percentage Appearing in Job Ads')
    fig = final_plot.get_figure() # Have to convert the pandas plot object to a matplotlib object
    
    
    return fig, final_frame # End of the function

In [83]:
seattle_info = skills_info(city = 'Seattle', state = 'WA') 

num_jobs_area
        Page 1 of 353 jobs
job_numbers['1', '353']
There were 353 jobs found, Seattle


NameError: name 'xrange' is not defined

In [98]:
city = 'Seattle'
state = 'WA'


final_job = 'data+scientist' # searching for data scientist exact fit("data scientist" on Indeed search)

# Make sure the city specified works properly if it has more than one word (such as San Francisco)
if city is not None:
    final_city = city.split() 
    final_city = '+'.join(word for word in final_city)
    final_site_list = ['http://www.indeed.com/jobs?q=%22', final_job, '%22&l=', final_city,
               '%2C+', state] # Join all of our strings together so that indeed will search correctly
else:
    final_site_list = ['http://www.indeed.com/jobs?q="', final_job, '"']

final_site = ''.join(final_site_list) # Merge the html address together into one string
base_url = 'http://www.indeed.com'

try:
    html = urllib.request.urlopen(final_site).read() # Open up the front page of our search first
except:
    'That city/state combination did not have any jobs. Exiting . . .' # In case the city is invalid
    
soup = BeautifulSoup(html, 'lxml') # Get the html from the first page

# Now find out how many jobs there were
num_jobs_area = soup.find(id = 'searchCount').string.encode('utf-8') # Now extract the total number of jobs found

num_jobs_area = num_jobs_area.decode('ISO-8859-1')# The 'searchCount' object has this

job_numbers = re.findall('\d+', num_jobs_area) # Extract the total jobs found from the search result

if len(job_numbers) > 3: # Have a total number of jobs greater than 1000
    total_num_jobs = (int(job_numbers[1])*1000) + int(job_numbers[3])
else:
    total_num_jobs = int(job_numbers[1]) 

city_title = city
if city is None:
    city_title = 'Nationwide'
print ('There were {} jobs found, {}'.format(total_num_jobs,city_title)) # Display how many jobs were found
num_pages = int(total_num_jobs/10) # This will be how we know the number of times we need to iterate over each new
                              # search result page
job_descriptions = [] # Store all our descriptions in this list

for i in range(1,num_pages+1): # Loop through all of our search result pages
    print ('Getting page {}'.format(i))
    start_num = str(i*10) # Assign the multiplier of 10 to view the pages we want
    current_page = ''.join([final_site, '&start=', start_num])

    # Now that we can view the correct 10 job returns, start collecting the text samples from each
    html_page = urllib.request.urlopen(current_page).read() # Get the page
    page_obj = BeautifulSoup(html_page, 'lxml') # Locate all of the job links
    job_link_area = page_obj.find(id = 'resultsCol') # The center column on the page where the job postings exist
    

    # Get the URLS for the jobs
    job_URLS=[]
    for link in job_link_area.find_all('a'):
        if link.get('href') == None:
            print('no href attribute')
            continue
        job_URLS.append(base_url + link.get('href'))
    job_URLS = list(filter(lambda x:'clk' in x, job_URLS)) # Now get just the job related URLS
 
    for j in range(0,len(job_URLS)):
        final_description = text_cleaner(job_URLS[j])
        if final_description: # So that we only append when the website was accessed correctly
            job_descriptions.append(final_description)
        sleep(1) # So that we don't be jerks. If you have a very fast internet connection you could hit the server a lot! 

print ('Done with collecting the job postings!')  
print ('There were {} jobs successfully found.'.format(len(job_descriptions)))


doc_frequency = Counter() # This will create a full counter of our terms. 
[doc_frequency.update(item) for item in job_descriptions] # List comp

# Now we can just look at our final dict list inside doc_frequency

# Obtain our key terms and store them in a dict. These are the key data science skills we are looking for
prog_lang_dict = Counter({'R':doc_frequency['r'], 'Python':doc_frequency['python'],
                'Java':doc_frequency['java'], 'C++':doc_frequency['c++'],
               'Ruby':doc_frequency['ruby'],
              'Perl':doc_frequency['perl'], 'Matlab':doc_frequency['matlab'],
              'JavaScript':doc_frequency['javascript'], 'Scala': doc_frequency['scala']})

analysis_tool_dict = Counter({'Excel':doc_frequency['excel'],  'Tableau':doc_frequency['tableau'],
                  'D3.js':doc_frequency['d3.js'], 'SAS':doc_frequency['sas'],
                  'SPSS':doc_frequency['spss'], 'D3':doc_frequency['d3']})  

hadoop_dict = Counter({'Hadoop':doc_frequency['hadoop'], 'MapReduce':doc_frequency['mapreduce'],
           'Spark':doc_frequency['spark'], 'Pig':doc_frequency['pig'],
           'Hive':doc_frequency['hive'], 'Shark':doc_frequency['shark'],
           'Oozie':doc_frequency['oozie'], 'ZooKeeper':doc_frequency['zookeeper'],
           'Flume':doc_frequency['flume'], 'Mahout':doc_frequency['mahout']})

database_dict = Counter({'SQL':doc_frequency['sql'], 'NoSQL':doc_frequency['nosql'],
             'HBase':doc_frequency['hbase'], 'Cassandra':doc_frequency['cassandra'],
             'MongoDB':doc_frequency['mongodb']})

overall_total_skills = prog_lang_dict + analysis_tool_dict + hadoop_dict + database_dict # Combine our Counter objects
final_frame = pd.DataFrame(overall_total_skills.items(), columns = ['Term', 'NumPostings']) # Convert these terms to a 
                                                                                            # dataframe 
# Change the values to reflect a percentage of the postings 
final_frame.NumPostings = (final_frame.NumPostings)*100/len(job_descriptions) # Gives percentage of job postings 
                                                                              #  having that term 
# Sort the data for plotting purposes
final_frame.sort(columns = 'NumPostings', ascending = False, inplace = True)

# Get it ready for a bar plot
final_plot = final_frame.plot(x = 'Term', kind = 'bar', legend = None, 
                          title = 'Percentage of Data Scientist Job Ads with a Key Skill, ' + city_title)

final_plot.set_ylabel('Percentage Appearing in Job Ads')
fig = final_plot.get_figure() # Have to convert the pandas plot object to a matplotlib object

doc_frequency = Counter() # This will create a full counter of our terms. 
[doc_frequency.update(item) for item in job_descriptions] # List comp

# Now we can just look at our final dict list inside doc_frequency

# Obtain our key terms and store them in a dict. These are the key data science skills we are looking for
prog_lang_dict = Counter({'R':doc_frequency['r'], 'Python':doc_frequency['python'],
                'Java':doc_frequency['java'], 'C++':doc_frequency['c++'],
               'Ruby':doc_frequency['ruby'],
              'Perl':doc_frequency['perl'], 'Matlab':doc_frequency['matlab'],
              'JavaScript':doc_frequency['javascript'], 'Scala': doc_frequency['scala']})

analysis_tool_dict = Counter({'Excel':doc_frequency['excel'],  'Tableau':doc_frequency['tableau'],
                  'D3.js':doc_frequency['d3.js'], 'SAS':doc_frequency['sas'],
                  'SPSS':doc_frequency['spss'], 'D3':doc_frequency['d3']})  

hadoop_dict = Counter({'Hadoop':doc_frequency['hadoop'], 'MapReduce':doc_frequency['mapreduce'],
           'Spark':doc_frequency['spark'], 'Pig':doc_frequency['pig'],
           'Hive':doc_frequency['hive'], 'Shark':doc_frequency['shark'],
           'Oozie':doc_frequency['oozie'], 'ZooKeeper':doc_frequency['zookeeper'],
           'Flume':doc_frequency['flume'], 'Mahout':doc_frequency['mahout']})

database_dict = Counter({'SQL':doc_frequency['sql'], 'NoSQL':doc_frequency['nosql'],
             'HBase':doc_frequency['hbase'], 'Cassandra':doc_frequency['cassandra'],
             'MongoDB':doc_frequency['mongodb']})



There were 351 jobs found, Seattle
Getting page 1
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=528205c6b4db1ecf&fccid=21ba96632d29e921&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Leafly-Holdings-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Leafly-Holdings', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Leafly-Holdings.html', 'http://www.indeed.com/rc/clk?jk=3decb38d87a325d9&fccid=160efb82f2462f14&vjs=3', 'http://www.indeed.com/cmp/Expedia-Group', 'http://www.indeed.com/cmp/Expedia-Group/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Expedia-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/sal

Getting page 3
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=8524622e3739b931&fccid=f7b190e23d6955d7&vjs=3', 'http://www.indeed.com/cmp/Eagleview-Technologies', 'http://www.indeed.com/cmp/Eagleview-Technologies/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Eagleview-Technologies-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Eagleview-Technologies', 'http://www.indeed.com/cmp/Eagleview-Technologies/faq', 'http://www.indeed.com/cmp/Eagleview-Technologies/faq/how-did-you-feel-about-telling-people-you-worked-at-eagleview-technologies?quid=1bmn6nci6brcvf0f', 'http://www.indeed.com/cmp/Eagleview-Technologies/faq/what-is-the-most-stressful-part-about-working-at-eagleview-technologies?quid=1cl0qqe67b88ie3k', 'http://www.indeed.com/forum/cmp/Eagleview-Technolo

Getting page 5
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=4dc6be702673bc7f&fccid=913e1b259c8d65e2&vjs=3', 'http://www.indeed.com/cmp/Zillow', 'http://www.indeed.com/cmp/Zillow/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Zillow-Group-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Zillow', 'http://www.indeed.com/cmp/Zillow/faq', 'http://www.indeed.com/cmp/Zillow/faq/how-did-you-feel-about-telling-people-you-worked-at-zillow?quid=1bspm0n6n1ahj65j', 'http://www.indeed.com/cmp/Zillow/faq/what-is-the-vacation-policy-like-how-many-vacation-days-do-you-get-per-year?quid=1c4i9orobak5t9ou', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Zillow.html', 'http://www.indeed.com/rc/clk?jk=8f1885cb99406b91&fccid=28be5add96

Getting page 6
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=5aab9e4f27dbbfc7&fccid=7b7d74f58f990cb9&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/jobs?q=Mercedes-benz+Research+%26+Development+North+America,+Inc&l=Seattle,+WA&nc=jasx', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Mercedes--benz-Research-&-Development-North-America,-Inc.', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/company/ifusioncore/jobs/Senior-Data-Scientist-b725b782aea56162?fccid=cd437007d27dd2e9&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Ifusioncore-l-Redmond,-WA-jobs.html', 'http://www.indeed.com/l-Redmond,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Redmond-WA', 'http://www.ind

Getting page 8
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=1ad7029ac623a233&fccid=c2c6a7536e4d9df3&vjs=3', 'http://www.indeed.com/cmp/Paccar-Inc.', 'http://www.indeed.com/cmp/Paccar-Inc./reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Paccar-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Paccar-Inc.', 'http://www.indeed.com/cmp/Paccar-Inc./faq', 'http://www.indeed.com/cmp/Paccar-Inc./faq/what-is-the-sick-leave-policy-like-how-many-sick-days-do-you-get-per-year?quid=1b0a15fbvaqisbnt', 'http://www.indeed.com/cmp/Paccar-Inc./faq/what-is-the-vacation-policy-like-how-many-vacation-days-do-you-get-per-year?quid=1bdkga494b86cfer', 'http://www.indeed.com/forum/loc/Bellevue-Washington.html', 'http://www.indeed.com/rc/clk?jk=32280e769e0684d1&fccid=4514e98bb41669

Getting page 9
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=6abd17f1f1fd05b8&fccid=160efb82f2462f14&vjs=3', 'http://www.indeed.com/cmp/Expedia-Group', 'http://www.indeed.com/cmp/Expedia-Group/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Expedia-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Expedia-Group', 'http://www.indeed.com/cmp/Expedia-Group/faq', 'http://www.indeed.com/cmp/Expedia-Group/faq/is-this-job-must-be-performed-from-expedia-call-center-in-las-vegas-nv-or-it-can-be-done-at-person-s-home?quid=1aph9km111d1f3nn', 'http://www.indeed.com/cmp/Expedia-Group/faq/how-are-the-working-hours?quid=1ao76rnphak9gc68', 'http://www.indeed.com/forum/cmp/Expedia-Group.html', 'http://www.indeed.com/forum/loc/Bellevue-Washington.html', 'http://www.indeed.co

Getting page 10
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=545db25e0a246c52&fccid=a3bcc3a125bee804&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Neal-Analytics-l-Kirkland,-WA-jobs.html', 'http://www.indeed.com/l-Kirkland,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Kirkland-WA', 'http://www.indeed.com/forum/loc/Kirkland-Washington.html', 'http://www.indeed.com/forum/cmp/Neal-Analytics.html', 'http://www.indeed.com/rc/clk?jk=c261efe94f3c8250&fccid=d4b58fa60b80adb8&vjs=3', 'http://www.indeed.com/cmp/Rover.com', 'http://www.indeed.com/cmp/Rover.com/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Rover-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Rover.com', 'http:

Getting page 12
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=63ec715d26fd3302&fccid=bc410a4106954d97&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/jobs?q=Nauwork,+LLC&l=Seattle,+WA&nc=jasx', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Nauwork%2C-LLC.html', 'http://www.indeed.com/rc/clk?jk=392afc193e082af0&fccid=fe2d21eef233e94a&vjs=3', 'http://www.indeed.com/cmp/Amazon.com', 'http://www.indeed.com/cmp/Amazon.com/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Amazon-Com-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Technical-Recruiter-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Amazon.co

Getting page 13
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=6e74525b6b338dbd&fccid=1639254ea84748b5&vjs=3', 'http://www.indeed.com/cmp/Facebook', 'http://www.indeed.com/cmp/Facebook/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Facebook-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Facebook', 'http://www.indeed.com/cmp/Facebook/faq', 'http://www.indeed.com/cmp/Facebook/faq/can-you-work-from-home?quid=1bltlotduaqgjcq8', 'http://www.indeed.com/cmp/Facebook/faq/how-did-you-feel-about-telling-people-you-worked-at-facebook?quid=1b3c2eqhhakab996', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Facebook.html', 'http://www.indeed.com/rc/clk?jk=04fdcf7d20ea995e&fccid=a4cd0efa7775ecd2&vjs=3', 'http://www.indeed.com/cm

Getting page 14
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=a8a273e27d5579fe&fccid=2dd390c3a48a7ed0&vjs=3', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Kpmg-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/faq', 'http://www.indeed.com/cmp/Kpmg/faq/why-did-you-leave-your-job-at-kpmg?quid=1bc4d8gkt5j52cqu', 'http://www.indeed.com/cmp/Kpmg/faq/what-is-the-work-environment-and-culture-like-at-kpmg-auditores-independentes?quid=1b6c94u5k5ncua0p', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Kpmg.html', 'http://www.indeed.com/rc/clk?jk=710e476626284372&fccid=3b4e0f2c2deb87d6&vjs=3', 'http://www.indeed.com/cmp/A

Getting page 16
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=1c8852aace809aac&fccid=fe2d21eef233e94a&vjs=3', 'http://www.indeed.com/cmp/Amazon.com', 'http://www.indeed.com/cmp/Amazon.com/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Amazon-Com-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Business-Intelligence-Manager-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Amazon.com', 'http://www.indeed.com/cmp/Amazon.com/faq', 'http://www.indeed.com/cmp/Amazon.com/faq/what-would-you-suggest-amazon-com-management-do-to-prevent-others-from-leaving-for-this-reason?quid=1bb857n98brch8lj', 'http://www.indeed.com/cmp/Amazon.com/faq/if-you-were-to-leave-amazon-com-what-would-be-the-reason?quid=1bb9r3ilcakgj8dv', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/rc/clk?jk=6d621d4e3b145e85&fccid=34

Getting page 17
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=1ad95333e8dc69f8&fccid=734cb5a01ee60f80&vjs=3', 'http://www.indeed.com/cmp/Microsoft', 'http://www.indeed.com/cmp/Microsoft/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmto4qsoag9ibe0&jk=1ad95333e8dc69f8&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSoftware%2BEngineer%26rbc%3DMicrosoft%26jtid%3D229c2d04b44eddae%26jcid%3D734cb5a01ee60f80%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Microsoft-l-Redmond,-WA-jobs.html', 'http://www.indeed.com/l-Redmond,-WA-jobs.html', 'http://www.indeed.com/salaries/Software-Engineer-Salaries,-Redmond-WA', 'http://www.indeed.com/cmp/Microsoft', 'http://www.indeed.com/cmp/Microsoft/faq', 'http://www.indeed.com/cmp/Microsoft/faq/how-are-the-working-hours?quid=1an36f47hb822853', 'http://www.indeed.com/cmp/Microsoft/faq/how-should-dress-for-an-

Getting page 18
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=b82295a5453461a8&fccid=fe2d21eef233e94a&vjs=3', 'http://www.indeed.com/cmp/Amazon.com', 'http://www.indeed.com/cmp/Amazon.com/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Amazon-Com-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Business-Intelligence-Developer-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Amazon.com', 'http://www.indeed.com/cmp/Amazon.com/faq', 'http://www.indeed.com/cmp/Amazon.com/faq/what-would-you-suggest-amazon-com-management-do-to-prevent-others-from-leaving-for-this-reason?quid=1bb857n98brch8lj', 'http://www.indeed.com/cmp/Amazon.com/faq/if-you-were-to-leave-amazon-com-what-would-be-the-reason?quid=1bb9r3ilcakgj8dv', 'http://www.indeed.com/forum/loc/Bellevue-Washington.html', 'http://www.indeed.com/company/Clearwing-Consulting-

Getting page 19
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dcbe920f0b6b2b81&fccid=2dd390c3a48a7ed0&vjs=3', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Kpmg-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Data-Scientist-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/faq', 'http://www.indeed.com/cmp/Kpmg/faq/why-did-you-leave-your-job-at-kpmg?quid=1bc4d8gkt5j52cqu', 'http://www.indeed.com/cmp/Kpmg/faq/what-is-the-work-environment-and-culture-like-at-kpmg-auditores-independentes?quid=1b6c94u5k5ncua0p', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Kpmg.html', 'http://www.indeed.com/rc/clk?jk=df5eff56e9f3361b&fccid=fe2d21eef233e94a&vjs=3', 'http://www.indeed.com/cmp/A

Getting page 20
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=e3c7e3b9122ee1e3&fccid=880e4714f2ad94a8&vjs=3', 'http://www.indeed.com/cmp/Unity-Technologies', 'http://www.indeed.com/cmp/Unity-Technologies/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Unity-Technologies-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Machine-Learning-Engineer-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Unity-Technologies', 'http://www.indeed.com/forum/loc/Bellevue-Washington.html', 'http://www.indeed.com/forum/cmp/Unity-Technologies.html', 'http://www.indeed.com/rc/clk?jk=7ac112d43498defe&fccid=734cb5a01ee60f80&vjs=3', 'http://www.indeed.com/cmp/Microsoft', 'http://www.indeed.com/cmp/Microsoft/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Microsoft-l-Bellevue,-WA-

Getting page 21
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=0ce46398a4c0b220&fccid=46f8fb05c236d8ef&vjs=3', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Maana-l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/l-Bellevue,-WA-jobs.html', 'http://www.indeed.com/salaries/Solutions-Engineer-Salaries,-Bellevue-WA', 'http://www.indeed.com/cmp/Maana,-Inc.', 'http://www.indeed.com/forum/loc/Bellevue-Washington.html', 'http://www.indeed.com/rc/clk?jk=51a1770d972f710d&fccid=734cb5a01ee60f80&vjs=3', 'http://www.indeed.com/cmp/Microsoft', 'http://www.indeed.com/cmp/Microsoft/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Microsoft-l-Redmond,-WA-jobs.html', 'http://www.indeed.com/l-Redmond,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Software-Engineer-Salaries,-Redmond-WA', 'http://www.indeed.com/cmp/Microsoft', 'http://www

Getting page 22
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=db1dd32a0b20f287&fccid=2dd390c3a48a7ed0&vjs=3', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/reviews', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Kpmg-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Engineer-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Kpmg', 'http://www.indeed.com/cmp/Kpmg/faq', 'http://www.indeed.com/cmp/Kpmg/faq/why-did-you-leave-your-job-at-kpmg?quid=1bc4d8gkt5j52cqu', 'http://www.indeed.com/cmp/Kpmg/faq/what-is-the-work-environment-and-culture-like-at-kpmg-auditores-independentes?quid=1b6c94u5k5ncua0p', 'http://www.indeed.com/forum/loc/Seattle-Washington.html', 'http://www.indeed.com/forum/cmp/Kpmg.html', 'http://www.indeed.com/rc/clk?jk=2d1b560d5d58fab7&fccid=0f5e6de249dd372f&vjs=3', 'http://www.indeed.com/cmp/

Getting page 24
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmto8lpu18jj22q&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 25
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmto99omagll8fa&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/faq', 'http://www.indeed.com/cmp/Providence-Health-&-Services/faq/why-did-you-l

Getting page 26
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmto9t96a2lab9m&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 27
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtoagrrae4hara&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 28
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtob64uav0ocqi&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 29
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtobpop18k377r&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 30
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtocd3318k23m4&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 31
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtod0pv1bt418s&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 32
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtodkblafd5det&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 33
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtoe99t187i5s4&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 34
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtoet9g1bt76th&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Getting page 35
no href attribute
['http://www.indeed.com/promo/prime', 'http://www.indeed.com/rc/clk?jk=dfc516d70d38e593&fccid=b983e0b562a36362&vjs=3', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Health-&-Services/reviews', 'http://www.indeed.com/addlLoc/redirect?tk=1cmtofio718jj66t&jk=dfc516d70d38e593&dest=%2Fjobs%3Fq%3D%2522data%2Bscientist%2522%26l%3DSeattle%252C%2BWA%26rbt%3DSenior%2BManager%252C%2BProduct%2BManagement%2BPSJH%26rbc%3DProvidence%2BHealth%2B%2526%2BServices%26jtid%3D54af0e4177a1a1d6%26jcid%3Db983e0b562a36362%26grp%3Dtcl', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com#', 'http://www.indeed.com/q-Providence-Health-&-Services-l-Seattle,-WA-jobs.html', 'http://www.indeed.com/l-Seattle,-WA-jobs.html', 'http://www.indeed.com/salaries/Senior-Director-of-Product-Management-Salaries,-Seattle-WA', 'http://www.indeed.com/cmp/Providence-Health-&-Services', 'http://www.indeed.com/cmp/Providence-Heal

Done with collecting the job postings!
There were 335 jobs successfully found.


ValueError: DataFrame constructor not properly called!

In [100]:
overall_total_skills.items()

dict_items([('R', 149), ('Python', 226), ('Java', 147), ('C++', 55), ('Ruby', 42), ('Perl', 31), ('Matlab', 27), ('JavaScript', 1), ('Scala', 66), ('Excel', 39), ('Tableau', 35), ('SAS', 50), ('SPSS', 23), ('D3', 12), ('Hadoop', 120), ('Spark', 121), ('Pig', 28), ('Hive', 86), ('Oozie', 1), ('Mahout', 15), ('SQL', 128), ('HBase', 10), ('Cassandra', 3)])

In [99]:

overall_total_skills = prog_lang_dict + analysis_tool_dict + hadoop_dict + database_dict # Combine our Counter objects
final_frame = pd.DataFrame(overall_total_skills.items(), columns = ['Term', 'NumPostings']) # Convert these terms to a 
                                                                                            # dataframe 
# Change the values to reflect a percentage of the postings 
final_frame.NumPostings = (final_frame.NumPostings)*100/len(job_descriptions) # Gives percentage of job postings 
                                                                              #  having that term 
# Sort the data for plotting purposes
final_frame.sort(columns = 'NumPostings', ascending = False, inplace = True)

# Get it ready for a bar plot
final_plot = final_frame.plot(x = 'Term', kind = 'bar', legend = None, 
                          title = 'Percentage of Data Scientist Job Ads with a Key Skill, ' + city_title)

final_plot.set_ylabel('Percentage Appearing in Job Ads')
fig = final_plot.get_figure() # Have to convert the pandas plot object to a matplotlib object


ValueError: DataFrame constructor not properly called!

In [103]:
#overall_total_skills = prog_lang_dict + analysis_tool_dict + hadoop_dict + database_dict # Combine our Counter objects
pd.DataFrame.from_dict(overall_total_skills,orient='index', columns=['Term', 'NumPostings']) # Convert these terms to a 
 

TypeError: from_dict() got an unexpected keyword argument 'columns'

In [107]:
import pandas as pd
pd.__version__

'0.20.3'