# Collect Jobs Data using GitHub Jobs API

In [41]:
import pandas as pd
import requests
import json

## Objective: Determine the number of jobs currently open for various technologies
Collect the number of job postings for the following languages using the API:

* C
* C#
* C++
* Java
* JavaScript
* Python
* Scala
* Oracle
* SQL Server
* MySQL Server
* PostgreSQL
* MongoDB

In [33]:
# Write a function to get the number of jobs for the given technology.
#Note: The API gives a maximum of 50 jobs per page.
#If you get 50 jobs per page, it means there could be some more job listings available.
#So if you get 50 jobs per page you should make another API call for next page to check for more jobs.
#If you get less than 50 jobs per page, you can take it as the final count.
baseurl = "https://jobs.github.com/positions.json"

def get_number_of_jobs(technology):
    
    number_of_jobs = 0
    page = 0
    job_count = 0
    search_full = False

    while search_full == False:
        url = baseurl +'?description={}'.format(technology) + '&page={}'.format(page)
        response = requests.get(url)
        if response.ok:
            data = response.json()
            number_of_jobs = number_of_jobs + len(data)
            job_count = len(data)
            if job_count == 50:
                page = page + 1
                search_full = False
            else:
                search_full = True
    return technology,number_of_jobs

In [35]:
#test the function get_number_of_jobs()
technology = 'Python'
get_number_of_jobs(technology)

In [39]:
# list of technologies
technologies = ['C','C%23', 'C++','Java','JavaScript','Python','Scala','Oracle','SQL Server','MySQL Server','PostgreSQL','MongoDB']
#an empty list, will contain Name of tech and number of jobs for this tech
num_jobs_tech = []
# Use C%23 inplace of C# in the coding and you will be surprised the output will come as C#  job numbers
for tech in technologies:
    # initial dict named cell
    cell = {}
    # get data    
    result=get_number_of_jobs(tech)
    # put data into dict
    cell['Technology'] = result[0]
    cell['Numbers_Job_Ad'] = result[1]
    # add cell to list
    num_jobs_tech.append(cell)

In [42]:
df_num_jobs_tech = pd.DataFrame(num_jobs_tech) 
df_num_jobs_tech

Unnamed: 0,Technology,Numbers_Job_Ad
0,C,268
1,C%23,29
2,C++,268
3,Java,149
4,JavaScript,124
5,Python,109
6,Scala,106
7,Oracle,9
8,SQL Server,22
9,MySQL Server,7


In [44]:
# change C%23 to C#
df_num_jobs_tech.loc[1,'Technology'] = 'C#'

In [45]:
df_num_jobs_tech

Unnamed: 0,Technology,Numbers_Job_Ad
0,C,268
1,C#,29
2,C++,268
3,Java,149
4,JavaScript,124
5,Python,109
6,Scala,106
7,Oracle,9
8,SQL Server,22
9,MySQL Server,7


In [47]:
# save to excel file
# if you have install openpyxl module for handle xlsx file, pleas do it pip install openpyxl
df_num_jobs_tech.to_excel('github-job-postings.xlsx')