In [1]:
import pandas as pd
import requests as rq
from pyscbwrapper import SCB

-----------------
### Request the Data From SCB

In [2]:
# Fetch values from 1994-2008
scb_old        = SCB('sv', 'NV', 'NV1401', 'NV1401X', 'KonkurserAnst')
fetch_data_old = scb_old.get_data()

# Fetch values from 2009-2022
scb_new        = SCB('sv', 'NV', 'NV1401', 'NV1401A', 'KonkurserAnst07')
fetch_data_new = scb_new.get_data()

------------------
### Save the Raw Data

In [3]:
# First Old Data
with open('source/source_konkurser_1994-2008_from_api.json', 'w') as outfile:
    outfile.write(f'{fetch_data_old}')

# Then New Data
with open('source/source_konkurser_2009-2022_from_api.json', 'w') as outfile:
    outfile.write(f'{fetch_data_new}')

------------
### Remove Irrelevant Data

In [4]:
# First Old Data
old_source_data = 'source/source_konkurser_1994-2008_from_api.json'
old_only_data   = 'raw/raw_konkurser_1994-2008_from_api.json'

with open(old_source_data, 'r') as old_in, open(old_only_data, 'w') as old_out:
    old_text = eval(old_in.readlines()[0])
    old_out.write(f"{old_text['data']}")


# Then New Data
new_raw_data  = 'source/source_konkurser_2009-2022_from_api.json'
new_only_data = 'raw/raw_konkurser_2009-2022_from_api.json'

with open(new_raw_data, 'r') as new_in, open(new_only_data, 'w') as new_out:
    new_text = eval(new_in.readlines()[0])
    new_out.write(f"{new_text['data']}")

------------
### Clean the Data

In [5]:
from collections import defaultdict

# Define a dict to store values in memory
dict_merge = defaultdict(str)

with open(old_only_data, 'r') as old_in, open(new_only_data, 'r') as new_in:
    list_old = eval(old_in.read())
    list_new = eval(new_in.read())

    # Add old items to dict
    for item_old in list_old:
        dict_merge[item_old['key'][0].replace('M', '-')] = item_old['values'][0]
    
    # Add new items to dict
    for item_new in list_new:
        dict_merge[item_new['key'][0].replace('M', '-')] = item_new['values'][0]


# Make a df
df = pd.DataFrame(dict_merge.items(), columns = ['date', 'layoffs'])

In [6]:
# Check for NULL
print(df.isna().sum())

date       0
layoffs    0
dtype: int64


------------
### Save as New Merged and Cleaned File

In [7]:
# Save the clean data as csv
df.to_csv('merge/clean_konkurser_1994-2022_from_api.csv', index = False)