### 1. Getting the data

In [46]:
import pandas as pd
import numpy as np
import requests

In [47]:
# Get the data from justjoin.it API
response = requests.get('https://justjoin.it/api/offers')
json = response.json()

In [50]:
# Load json to df using .json_normalize
json_normalize = pd.json_normalize(json)
json_normalize.head(2)

Unnamed: 0,title,street,city,country_code,address_text,marker_icon,workplace_type,company_name,company_url,company_size,...,remote_interview,open_to_hire_ukrainians,id,display_offer,employment_types,company_logo_url,skills,remote,multilocation,way_of_apply
0,Automation Tester - Car Sharing Platform,Złota 59,Warszawa,PL,"Złota 59, Warszawa",testing,partly_remote,ITDS,http://www.itds.pl,200+,...,True,True,itds-automation-tester-car-sharing-platform,True,"[{'type': 'b2b', 'salary': {'from': 10000, 'to...",https://bucket.justjoin.it/offers/company_logo...,"[{'name': 'Python', 'level': 3}, {'name': 'Jav...",False,"[{'city': 'Warszawa', 'street': 'Złota 59', 's...",form
1,Remote Mid Android Developer,Skierniewicka 10,Warszawa,PL,"Skierniewicka 10, Warszawa",mobile,remote,Mobica,http://mobica.com,100+,...,True,False,mobica-remote-mid-android-developer,True,"[{'type': 'b2b', 'salary': {'from': 11760, 'to...",https://bucket.justjoin.it/offers/company_logo...,"[{'name': 'RxJava', 'level': 3}, {'name': 'Kot...",True,"[{'city': 'Warszawa', 'slug': 'mobica-remote-m...",form


We have some records that contain nested records. We can use json_normalize once again, this time with parameters
- record_path - allows to indicate a nested field
- meta - concatenates additional columns that are not nested

In [54]:
df = pd.json_normalize(json, record_path='employment_types', meta=['id', 'title', 'marker_icon', 'workplace_type',
                                                                   'remote', 'published_at'])
df.head()

Unnamed: 0,type,salary.from,salary.to,salary.currency,salary,id,title,marker_icon,workplace_type,remote,published_at
0,b2b,10000.0,15000.0,pln,,itds-automation-tester-car-sharing-platform,Automation Tester - Car Sharing Platform,testing,partly_remote,False,2022-06-18T08:00:26.475Z
1,permanent,8000.0,11500.0,pln,,itds-automation-tester-car-sharing-platform,Automation Tester - Car Sharing Platform,testing,partly_remote,False,2022-06-18T08:00:26.475Z
2,b2b,11760.0,22680.0,pln,,mobica-remote-mid-android-developer,Remote Mid Android Developer,mobile,remote,True,2022-06-18T08:00:26.475Z
3,b2b,6000.0,10000.0,pln,,codibly-it-administrator,IT Administrator,admin,office,False,2022-06-18T08:00:26.475Z
4,permanent,16000.0,22000.0,pln,,globallogic-autosar-architect,AUTOSAR Architect,architecture,remote,True,2022-06-18T08:00:26.475Z


In [58]:
# Reorder columns and get rid of "salary" column
df = df.reindex(columns=['id', 'title', 'marker_icon', 'workplace_type', 'remote', 'published_at',
                        'type', 'salary.from', 'salary.to', 'salary.currency'])
# Check data types
df.dtypes

id                  object
title               object
marker_icon         object
workplace_type      object
remote              object
published_at        object
type                object
salary.from        float64
salary.to          float64
salary.currency     object
dtype: object

In [71]:
# Convert columns to specified formats
df['remote'] = df['remote'].astype('bool')
df['published_at'] = pd.to_datetime(df['published_at'], utc=False)

In [72]:
df.head()

Unnamed: 0,id,title,marker_icon,workplace_type,remote,published_at,type,salary.from,salary.to,salary.currency
0,itds-automation-tester-car-sharing-platform,Automation Tester - Car Sharing Platform,testing,partly_remote,False,2022-06-18,b2b,10000.0,15000.0,pln
1,itds-automation-tester-car-sharing-platform,Automation Tester - Car Sharing Platform,testing,partly_remote,False,2022-06-18,permanent,8000.0,11500.0,pln
2,mobica-remote-mid-android-developer,Remote Mid Android Developer,mobile,remote,True,2022-06-18,b2b,11760.0,22680.0,pln
3,codibly-it-administrator,IT Administrator,admin,office,False,2022-06-18,b2b,6000.0,10000.0,pln
4,globallogic-autosar-architect,AUTOSAR Architect,architecture,remote,True,2022-06-18,permanent,16000.0,22000.0,pln
