# Linkedin

**Trabajos.**

https://www.linkedin.com/jobs/search/?geoId=105646813&keywords=data%20science&location=Espa%C3%B1a

In [1]:
from bs4 import BeautifulSoup as bs
import requests as req

import pandas as pd
import pymongo
import json
import time
import datetime
import matplotlib.pyplot as plt

In [24]:
class Linkedin:
    
    def __init__(self):
        
        self.codes={'mexico':106262181, 'españa':105646813, 'usa':103644278,
                   'brasil':106057199, 'portugal':100364837, 'alemania': 101282230,
                   'francia':105015875, 'holanda':102890719}
        
    def search(self, keywords, num_pages, country, n_secs, exp):
        
        URL='https://www.linkedin.com/jobs/search/'
        
        data=[]
        
        for i in range(num_pages):
            
            scrape_url=''.join([URL, '?keywords=', keywords,    # palabras clave de busqueda
                                     f'&location={country}',    # pais (lugar)
                                     f'&f_TPR={n_secs}',        # segundos atras
                                     f'&f_E={exp}',             # experiencia
                                     f'&start={i*25}',          # numero de paginas (i), 25 anuncios por pag
                                     f'&geoID={self.codes[country]}'])     
            
            page=req.get(scrape_url)
            
            soup=bs(page.text, 'html.parser')
            
            for card in soup.select('div.result-card__contents'):
                
                try:
                    comp_link=card.select('a', class_='result-card__subtitle-link job-result-card__subtitle-link')[0].attrs['href']
                except:
                    comp_link=''
                    
                title=card.findChild('h3', recursive=False)
                company=card.findChild('h4', recursive=False)
                location=card.findChild('span', attrs={'class':"job-result-card__location"},recursive=True)
                datetime=card.findChild('time', recursive=True).attrs['datetime']
                try:
                    desc=card.select('p')[0].text
                except:
                    desc=None
                
                
                data.append({'title':title.string,
                             'country': country,
                             'location':location.string,
                             'company':company.string,
                             'date': datetime, 
                             'description': desc,
                             'company_link': comp_link,
                             'experience': exp,
                             'keywords': keywords})
                
        return data

In [25]:
# keywords, num_pages, pais, segundos atras, exp

Linkedin().search('data', 2, 'españa', 30000, 3)[10]

{'title': 'Aviation Data Analyst - ALG Barcelona/Madrid',
 'country': 'españa',
 'location': 'Barcelona, Catalonia, Spain',
 'company': 'ALG',
 'date': '2020-12-18',
 'description': 'Aviation consultancy experience is desirable but not required: Airline Business & operations, Traffic forecast, route development & ...',
 'company_link': 'https://es.linkedin.com/company/alg?trk=public_jobs_job-result-card_result-card_subtitle-click',
 'experience': 3,
 'keywords': 'data'}

In [None]:
%%time

countries=['mexico', 'españa', 'usa', 'brasil', 'portugal', 'alemania', 'francia', 'holanda']
           

keywords=['web developer', 'javascript  developer', 'back-end  developer', 'front-end  developer',
         'software engineer', 'frontend developer', 'backend developer',
         'web designer', 'ux designer', 'web graphic designer',
         'ux/ui designer', 'product designer',
         'data engineer', 'data analyst','data scientist', 'database administrator']     


results=[]

for c in countries:
    for k in keywords:
        for ex in range(1, 4):
            try:
                results+=Linkedin().search(k, 5, c, 1e6, ex)
            except:
                continue

In [27]:
%%time

countries=['españa', 'portugal']
           

keywords=['data analyst','data scientist']     


results=[]

for c in countries:
    for k in keywords:
        for ex in range(1, 2):
            try:
                results+=Linkedin().search(k, 5, c, 1e2, ex)
            except:
                continue

CPU times: user 1.45 s, sys: 59.8 ms, total: 1.51 s
Wall time: 17.9 s


In [29]:
cliente=pymongo.MongoClient()
db_mongo=cliente.linkedin

for e in results:
    db_mongo.jobs.update(e, e, upsert=True)

  """


In [32]:
jobs_data=list(db_mongo.jobs.find({}, {'_id':0}))

df_jobs=pd.DataFrame(jobs_data)
df_jobs.to_csv('../data/jobs.csv')

df_jobs.head()

Unnamed: 0,Unnamed: 1,title,country,location,company,date,description,company_link,experience,keyword,keywords
0,0,Frontend Developer,mexico,"Hacienda San Pablo, México, Mexico",Vox Feed,2019-08-18,Passion for building interfaces that bring the...,,2,web developer,
1,1,Desarrollador Web Javascript Jr Zona Interlomas,mexico,"Atizapán de Zaragoza, México, Mexico",ALIA,2019-10-22,"Alia, Empresa Mexicana Enfocada En Brindar Ser...",https://mx.linkedin.com/company/alia?trk=publi...,2,web developer,
2,2,Desarrollador web,mexico,"Tlalnepantla, México, Mexico",Empresa: ACCOFF MANUFACTURERA S.C.,2020-05-11,ODesarrollo de componentes nativos con Javascr...,,2,web developer,
3,3,Desarrollador Web Javascript Jr Bilingüe - Zon...,mexico,"Atizapán de Zaragoza, México, Mexico",ALIA,2019-10-09,"Alia, Empresa Mexicana Enfocada En Brindar Ser...",https://mx.linkedin.com/company/alia?trk=publi...,2,web developer,
4,4,Desarrollador web,mexico,"Tlalnepantla, México, Mexico",Empresa: ACCOFF MANUFACTURERA S.C.,2020-05-14,ODesarrollo de componentes nativos con Javascr...,,2,web developer,


**Salarios.**

https://www.linkedin.com/salary/explorer?countryCode=es&geoId=105646813&titleId=340

In [51]:
class Salary:
    
    def __init__(self):
        
        self.codes={'mx':103323778, 'es':105646813, 'us':103644278,
                    'br':106057199, 'de': 101282230,
                    'fr':105015875, 'nl':102890719}
    
    def get_country(self, country):
        
        if country=='pt':
            country='es'
        
        data=[]
        
        # desarrollador web, desarrollador javascript, desarrollador back-end, desarrollador front-end,
        # diseñador web, diseñador ux, diseñador grafico web,
        # ingeniero datos, analista datos, cientifico datos, administrador base de datos
        JOBS=[100, 25170, 25194, 3172, 160, 3114, 1148, 2732, 340, 25190, 132]
        
        URL='https://www.linkedin.com/salary/explorer?'

        low=[]
        median=[]
        high=[]
        name=[]
        
        for job in JOBS:
            
            time.sleep(1)
            
            scrape_url=''.join([URL, 'countryCode={}'.format(country),
                                     '&titleId={}'.format(job),
                                     '&geoId={}'.format(self.codes[country])])

            page = req.get(scrape_url)

            soup = bs(page.text, 'html.parser')
            print(soup.select('code'))
            dictio=json.loads(soup.select('code')[8].text.strip())
            try:
                low=' '.join(list(dictio['elements'][0]['baseCompensation']['lowEnd'].values())[::-1])
                median=' '.join(list(dictio['elements'][0]['baseCompensation']['median'].values())[::-1])
                high=' '.join(list(dictio['elements'][0]['baseCompensation']['highEnd'].values())[::-1])
                name=dictio['metadata']['cohortRequested']['title']['localizedName']
            except:
                pass
            
        
            data.append({'Job_position'  : name, 
                        'LowEnd_salary'  : low, 
                        'Median_salary'  : median,
                        'HighEnd_salary' : high,
                        'Country': country,
                        'datetime': datetime.datetime.today()})
            
        
        return data

In [52]:
Salary().get_country('mx')

[<code id="bpr-guid-530853" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-530853" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-530853","method":"GET"}
</code>, <code id="bpr-guid-530854" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-530854" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-530854","method":"GET"}
</code>, <code id="bpr-guid-530855" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[<code id="bpr-guid-521758" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-521758" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-521758","method":"GET"}
</code>, <code id="bpr-guid-521759" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-521759" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-521759","method":"GET"}
</code>, <code id="bpr-guid-521760" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[<code id="bpr-guid-550352" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-550352" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-550352","method":"GET"}
</code>, <code id="bpr-guid-550353" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-550353" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-550353","method":"GET"}
</code>, <code id="bpr-guid-550354" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[<code id="bpr-guid-531312" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-531312" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-531312","method":"GET"}
</code>, <code id="bpr-guid-531313" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-531313" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-531313","method":"GET"}
</code>, <code id="bpr-guid-531314" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[<code id="bpr-guid-515965" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-515965" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-515965","method":"GET"}
</code>, <code id="bpr-guid-515966" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-515966" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-515966","method":"GET"}
</code>, <code id="bpr-guid-515967" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[<code id="bpr-guid-534262" style="display: none">
  {"mediaConfig":{"mprConfig":{"sizes":[{"width":100,"height":100},{"width":200,"height":200},{"width":400,"height":400}],"filters":{"cover":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}","contain":"https://media.licdn.com/mpr/mpr/shrinknp_{width}_{height}{+id}","original":"https://media.licdn.com/media{+id}","fill":"https://media.licdn.com/mpr/mpr/shrink_{width}_{height}{+id}"}}}}
</code>, <code id="datalet-bpr-guid-534262" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerConfiguration","status":200,"body":"bpr-guid-534262","method":"GET"}
</code>, <code id="bpr-guid-534263" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-534263" style="display: none">
  {"request":"/salary-explorer/api/salaryExplorerMe","status":404,"body":"bpr-guid-534263","method":"GET"}
</code>, <code id="bpr-guid-534264" style="display: none">
  {"status":404}
</code>, <code id="datalet-bpr-guid-5

[{'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',
  'datetime': datetime.datetime(2020, 12, 18, 20, 7, 9, 583521)},
 {'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',
  'datetime': datetime.datetime(2020, 12, 18, 20, 7, 10, 895763)},
 {'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',
  'datetime': datetime.datetime(2020, 12, 18, 20, 7, 12, 220539)},
 {'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',
  'datetime': datetime.datetime(2020, 12, 18, 20, 7, 13, 537188)},
 {'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',
  'datetime': datetime.datetime(2020, 12, 18, 20, 7, 14, 792415)},
 {'Job_position': [],
  'LowEnd_salary': [],
  'Median_salary': [],
  'HighEnd_salary': [],
  'Country': 'mx',


## Plots