In [1]:
#packages
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup 
import nltk
import numpy as np
import re
from nltk.corpus import wordnet
from selenium import webdriver

In [2]:
def raw_text(url):
    """Takes a URL as input and performs web scrapping to retrieve the body of the
    webpage (in this case a Linkedin ad)"""
    ad = requests.get(url) #Retrieve webpage
    Html = BeautifulSoup(ad.text, 'html.parser') #Convert html into a nicer format
    text_body = Html.find_all('div', 
                              {'class':"show-more-less-html__markup show-more-less-html__markup--clamp-after-5"})
    text_body = text_body[0].text
    return text_body
def clean_text(doc):
    """Take an unstructured document and tokenize it into a list of words. 
    Then standardize it by lowercasing and lemmatizing each word"""
    words = re.findall(r'(?:[a-zA-Z]|#|"+")+',doc) #Find all alphabetical words (Preserve + and # for C++ and C#)
    clean = [i for i in words if i.isupper() or i.islower()] #Retrieve all words that aren't glued to each other
    dirty = [i for i in words if not i.islower() and not i.isupper()] #Retrieve words stuck together
    dirty = [re.findall('[a-zA-Z][^A-Z]*',i) for i in dirty] #Split all the tangled words ie split 'ThisExample' into ['This','Example']
    clean2 = [j for i in dirty for j in i] #Unlist the list of lists
    words = clean + clean2 #Combine all the words together
    stopwords = nltk.corpus.stopwords.words("english")
    words = [i.lower() for i in words] #Lowercase all words
    words = [i for i in words if i not in stopwords] #Filter out stopwords
    tag_words = nltk.pos_tag(words) #Begin lemmatizing by tagging each word
    tag_words = [(i, wordnet_pos(j)) for (i, j) in tag_words] #Convert the tags into something the lemmatizer understands
    lemmatizer = nltk.WordNetLemmatizer()
    clean_words = [lemmatizer.lemmatize(i, j) for i, j in tag_words] #Lemmatize the words
    #Document should be cleaned up
    return clean_words
def wordnet_pos(tag):
    """Map a Brown POS tag to a WordNet POS tag."""
    
    table = {"N": wordnet.NOUN, "V": wordnet.VERB, "R": wordnet.ADV, "J": wordnet.ADJ}
    
    # Default to a noun.
    return table.get(tag[0], wordnet.NOUN) #Function created by Bo Ning in Week 6-2
def lang_count(TXT):
    """ Take a body of clean text and count the number of programming languages present"""
    languages = ['python','r','sql','sa','c',
                 'c++','c#','java','javascript',
                 'julia','matlab','swift','tableau'
                'microsoft','github'] #SAS turns into sa after lemmatization
    #ADD MORE LANGUAGES IF NECESSARY
    count = sum([i in TXT for i in languages]) #Check if each language is in the ad
    #And sum the number of programming languages present
    return count
def get_salary(TXT):
    """From a body of raw text, retrieve the salary"""
    salaries = re.findall(r"(\$\d+\,\d+\.\d{1,2})",TXT) #Find all numbers with $ , and .
    if salaries != []:
        return salaries[-1] #Let's work with the maximum salary
    else:
        salaries = re.findall(r"(\$\d+\,\d+)",TXT) 
        if salaries != []:
            return salaries[-1]
        else:
            return "NaN"
def ML_skill(TXT):
    """Using a body of clean text, check whether the words machine learning is present
    to see if it is a required skill"""
    return str(int('machine' in TXT and 'learning' in TXT))
def get_edu(TXT):
    """Using a body of raw text, retrieve the education level"""
    if "Master" in TXT and "Bachelor" in TXT:
        return "4" #Category where Bachlor's is minimum but higher level preferred
    elif "PhD" in TXT:
        return "3"
    elif "Master" in TXT:
        return "2"
    elif "Bachelor" in TXT:
        return "1"
    else:
        return "0" #No education specified
def ad_type(TXT):
    """Using a body of clean text, check whether this is an ad for data analyst or scientists"""
    return str(int("science" in TXT))
def benefits(TXT):
    """Using a body of raw text, check if benefits are included"""
    return str(int('Benefits' in TXT or 'benefits' in TXT))
def exp(TXT):
    """Using a body of raw text, check if experience is required/preferred"""
    sentences = nltk.sent_tokenize(TXT) #Split text into sentences
    years = [re.findall(r"\d+.*year", i) for i in sentences] #Find sentences with years in it
    for items in years:
        if items != []:
            years = [i for i in years if i != []][0][0] #Get rid of empty values and turn the years of experience into a string
            year = re.findall(r'\d+',years)[0]
            return year
    return "NaN"

In [3]:
def collect_data(url):
    """Input a URL for a Linkedin Ad and retrieve all relevant data"""
    raw = raw_text(url)
    clean = clean_text(raw)
    return {'Languages':lang_count(clean),
            'Salary':get_salary(raw),
            'Machine Learning':ML_skill(clean),
            'Education':get_edu(raw),
            'Type': ad_type(clean),
            'Benefits':benefits(raw),
            'Experience':exp(raw),
            'url':url}

In [4]:
url = [
    'https://www.linkedin.com/jobs/view/3249211510/?alternateChannel=search&refId=VD198ZoVfaKwgiB8zkGpbw%3D%3D&trackingId=BqYouS74cmZGHpxL9C2ZGA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3487242175/?alternateChannel=search&refId=JXABqO3QZgv%2BLHOKFDE7fw%3D%3D&trackingId=N9I6sffo3LQS3fZ%2BA%2BKsEg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505418682/?alternateChannel=search&refId=%2BpaSeyQbeKD%2BxYKKAizIPQ%3D%3D&trackingId=0Lxb1PxTW7n%2FDV7rrhIG8Q%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3498974602/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=gzR56kYgzIfDemBhp74f6g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3508607089/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=loqcIsi7A8CL8Vmop62OSg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3456862041/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=juDPb5S4%2ByysHmbv9JDl5g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507843178/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=XHD%2B27dW8g8P%2BKaCAw4emQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507141700/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=P54YAehQYvhHb1JvgxN8ag%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3492916139/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=a3nC%2BSZ6wfMUPl3mewKU1g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3499269839/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=V6OqJudkv7yK35YLX9NdbA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3499255853/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=mJh5WrzLB4mTkKzDIU3zXw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3511632332/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=lEarBtUbrbcWqTvWU6XeOQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505403106/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=ZwAL4nT5X%2F%2BZqwyio47svA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512272186/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=DuDxT6L0wnOVfyTzr789Xg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3499126864/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=StIEWFb2FP%2FWkYGpYw24Nw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505977879/?alternateChannel=search&refId=mjF%2Bzo2ld8vXVKBKARXTYA%3D%3D&trackingId=WkyM1jycSWWDghtmqQcMlw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3509655528/?alternateChannel=search&refId=gPWJCLIvYpWYFGgTxJqtJw%3D%3D&trackingId=Vt2F8v9iONwtg%2BQnfuGIow%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505977879/?alternateChannel=search&refId=gPWJCLIvYpWYFGgTxJqtJw%3D%3D&trackingId=Dlqta%2BGB01lG1%2FZ3C0q7AQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3487142091/?alternateChannel=search&refId=eUf97BLKMcJCe7F8oZDXrQ%3D%3D&trackingId=Cqns7vrChI9d3IyXc4sWhA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488691439/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=7QSNY3MZRgXIP%2F7L9Hb6TA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3491778866/?alternateChannel=search&eBP=CwEAAAGGrnZxmz-4qudDf0AdmTbtkBai6eckvna0nULvtPTyWR3KcRAlWohbkoheImFe6yrm0PJGCqms3ANrie3eQ-zOxZRQbiemcoTXSvd0m2aUuSP3_shuvJGjJAFGTLmtgwN8SjwrMcqjk57QmvxZRVbL4iCeeS8GhQGSatY6sMgtNtBK1wWP4hgWr5bULp_WIt9nkD_D-V4Q-Ef0ncD9AzGOw4o3s6vEaWVcp-b_CThnm8pycRz5T6xECAOSoKAn_8Et9HMrH6T-isrNFi7vAE37x5hbjg1EXnXR1q45NOObNqm8v_D169Q-ZXScDreN2dYIuPSehS4TtesZZioygUYMZV0nVrjeu25ZQN8onUq1eIxTWgflHdowbddk3ti6BI8kOQ&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=QYFx7G1E1JVHSs1rAAH6%2Bw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496731411/?alternateChannel=search&refId=eUf97BLKMcJCe7F8oZDXrQ%3D%3D&trackingId=WHPyV1Ya1NlCbfjMXJjV%2BQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488630612/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=0HkaQFncmzSAAm1VAFz%2F6g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496102239/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=JiYcbVpm%2BKz%2Fl9HpYKM2ow%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495674661/?alternateChannel=search&refId=NWABB47NCvXLKrd7Ykt7TA%3D%3D&trackingId=B1rPbwrTjIzCrTSyGxGDdw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3491959780/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=yCZVySVeiQvNcAs8tV7O5g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3482557663/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=eFtodEnJq8%2FvDhGKTFy9vA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3497655155/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=yaesC0%2FtNjjOxUrebgAuow%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496092247/?alternateChannel=search&refId=tD86EsvkK2i9il%2BaH3%2FXhQ%3D%3D&trackingId=MwHDvID6jzlUD%2BPT1jU%2FdA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3493897268/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=GBF0o%2BqVmbDn5c36zI4qcQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496092247/?alternateChannel=search&refId=VHK7B18qKFlkneKMGosKdg%3D%3D&trackingId=1rwG%2BDddA1JXaBxCVyhSyg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495686468/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=SnethqT%2B4%2Fa89lESpJllDw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3485531526/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=eRDs7hsAjfDBS8Tt8iIThQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3503792094/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=P%2FRB55O%2B%2BGYAP8gTL9ugzQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495654543/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=lc3G0c8WgTlWBNO0saNtbg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495654842/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=zn%2FE4wTVmE%2F1Z1f4C2ZVKA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3491485328/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=fzlqSgFB2yLvKnF%2BhSCwNA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3491485328/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=fzlqSgFB2yLvKnF%2BhSCwNA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3480285201/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=oKy93LI7m48hkD5o%2FU0Gaw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488092382/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=uMo5pboZR48Oo%2B0SoQup6g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3500262772/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=73%2FfPVUyE3OOrGXUb%2FmCWA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3489487289/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=lx8CV2%2Fymqo3tMC%2BI4hsYQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495634532/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=MFGxvS1mP%2BSqvl3o%2Fu1Sxg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494529354/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=KmkV0t2YATwAzRNVksZZKA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494564440/?alternateChannel=search&refId=bUuSO67FACleck0qLIsOEw%3D%3D&trackingId=V0rJXgQpqFgEFsEjKH9nIA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495693106/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=CX0IhX5V77rvigNHMkIrrQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3487777637/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=BgtW%2FNgcmVtsXQiKzYtNoA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3487709897/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=PShEgSQ3izBxrBULeHxCkA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512466597/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=%2FyLV2WJVdHyZomxOtwp4jA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495654381/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=5FlcyfVV9LL3fKelk0mxKw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3497733873/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=lnnR0DzdirfW1u1zNtOlPw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3499554593/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=B3KwFaQdacHuzEgbG%2FG8bQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494451117/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=N4k%2FFHwodKXVcTP%2FmHLYqw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512451344/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=5XaohZQ6qB63Z7tIIjjhQA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494238784/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=igZN8F%2BOFt6%2Byp2snzd8YQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3500266283/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=kclMX7LrdETh0v4POX0vLw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495578342/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=0Ob422e1FO0DIIXMvSm5EQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3490804946/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=cuqQcFpGS9y5yq7gb3UV7Q%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512520788/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=mjG2eaO8VZpMtzSy4P3Owg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3490303130/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=4zpQrFg96sHFDEjZ3zdqBg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3493909748/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=1HilajB3LHEsvebT4PNQTg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3485549081/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=tBtPBbnxI%2BnQb68734FC1Q%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496797402/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=KcmjmUD7%2F7bG%2FZlZlX0qtA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507194381/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=qyDp2TbPLRTBq2maWevV6g%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488658887/?alternateChannel=search&refId=dkG0OgTVkJlbNe5Acdpd%2BA%3D%3D&trackingId=SRHp0scNNRfnjwftbEAPoQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505492538/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=pQS1%2BXa7jJ5c5ORNPG2oDw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505496763/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=Ky1AMuFgVY0eg5xWXAV6pA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3496431666/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=BIxqiZZTckSPgzv7Eo0JYw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3511827145/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=XHmIhAk%2BQvSuRU%2BaeYe88A%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512429439/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=n8SEoeoz1dfLqu47MuFjIA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3512515787/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=rsvwuW%2BCsyvBzd3tFuKprg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505448647/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=V6v0QByb4WC%2BUTOlI4Vv9A%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507896566/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=C%2Fb04eEQ2XPmAhNMYmxTrw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505424217/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=eWfeN2eGi2trEqZzQHTpwA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3505905528/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=zsbcfe9%2BZyo2KpmJUx2LLw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3500269812/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=XtAo8iXdP8AvjWWwD8nD6Q%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3511369169/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=MMecUSa%2BH63rhaZ%2FFRZR6w%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495610932/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=J3JlNhwuDXXFk%2BzWOwTiMg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507844077/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=eg3UbzwOtVX1ODBaF0EmKQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3507860531/?alternateChannel=search&refId=tjE8t1GFVv2h1AdFOsRIzA%3D%3D&trackingId=Snpgxpd6WRLVZpJbHpBDzw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3490606955/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=DCxbydnFgo3SRPa%2FHTYJUA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3493466568/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=sTkNRmT6MNXgFu82uvfz6A%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495647309/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=RkJ%2FKZoNLXDamjy887uYYg%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494073924/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=gWsH6wbX8xqRYkZq7r4AFQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3485575875/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=NsRV%2FetbSd8wHxul61SKLw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3498811906/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=rDebeUGzBQw19sg98lGXzw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3494592372/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=jtLvE1kETibm%2BJVF0Xak%2BA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3486412422/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=LFl2SSkSQUGLNKZAOjBwZA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3498130371/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=S5%2F3%2FvefEvnT%2FGDg6OSkPw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3501081381/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=BdLYQyqU14CzyrYTmJYlmQ%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495815900/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=9Vm07nw6qvAl1PI7N2og%2BA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3489481743/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=P3kK82gRd%2BMURBwBr21qLw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3495476456/?alternateChannel=search&refId=PoisozcNOKyt2tr%2Fz76%2BRA%3D%3D&trackingId=Dt%2F%2Fz%2B4jn3qiAjzOkKCaiA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3489526424/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=%2FRUhZbHsdO6F6vJ%2BYNSkdw%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488623693/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=PSYr740wMPbe%2FZZx%2BPCQxA%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3492602300/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=wSyeZXXhgtMUxl2lkOdp5w%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3503741339/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=zIhbNtFJmy5ZXEu1GBnZ8A%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3497727530/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=02ldEIy%2BIL%2BUrVUe5BOc0w%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3497727530/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=02ldEIy%2BIL%2BUrVUe5BOc0w%3D%3D&trk=d_flagship3_search_srp_jobs',
    'https://www.linkedin.com/jobs/view/3488239050/?alternateChannel=search&refId=UgS%2BLnzOxzd0o5lfBwrIUg%3D%3D&trackingId=xFWvNOU50J5jZ9TTsXwDRA%3D%3D&trk=d_flagship3_search_srp_jobs'
]

In [5]:
len(url)

100

In [6]:
data_analyst1= [collect_data(i) for i in url[0:10]] 

In [None]:
data_analyst2 = [collect_data(i) for i in url[10:20]]

In [None]:
data_analyst3 = [collect_data(i) for i in url[20:30]]

In [None]:
data_analyst4 = [collect_data(i) for i in url[30:40]]

In [None]:
data_analyst5 = [collect_data(i) for i in url[40:50]]

In [None]:
data_analyst6 = [collect_data(i) for i in url[50:60]]

In [None]:
data_analyst7 = [collect_data(i) for i in url[60:70]]

In [None]:
data_analyst8 = [collect_data(i) for i in url[70:80]]

In [20]:
data_analyst9 = [collect_data(i) for i in url[80:90]]

In [15]:
data_analyst10 = [collect_data(i) for i in url[90:100]]

In [21]:
#create one list
data_analyst_final =  (
    data_analyst1 + 
    data_analyst2 + 
    data_analyst3 +
    data_analyst4 +
    data_analyst5 +
    data_analyst6 +
    data_analyst7 +
    data_analyst8 +
    data_analyst9 +
    data_analyst10) #merge the data_analyst folds
len(data_analyst_final)


100

In [22]:
print(len(data_analyst1))
print(len(data_analyst2))
print(len(data_analyst3))
print(len(data_analyst4))
print(len(data_analyst5))
print(len(data_analyst6))
print(len(data_analyst7))
print(len(data_analyst8))
print(len(data_analyst9))
print(len(data_analyst10))

10
10
10
10
10
10
10
10
10
10


In [25]:
#creating csv file for 100 data anlyst postings
import csv

keys =data_analyst_final[0].keys()

with open('data_analyst.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(data_analyst_final)

In [None]:
#number = 0
#for i in url:
   # number += 1
   # print(number)
   # print(i)
    #collect_data(i)