## Script for processing and transforming pickle files to dataframes

In [51]:
import pickle
import pandas as pd
import numpy as np

##### Importing pickle file

In [52]:
with open(r'..\..\Data\Raw\mattilsynet_rawdata.obj', 'rb') as file:
    mattilsynet_data = pickle.load(file)

##### List to pandas dataframe

In [53]:
df = pd.DataFrame(mattilsynet_data)

##### Removing duplicate columns 
##### (dataset has both bokmål and nynorsk description of grading categories)

In [54]:
duplicate_columns = ['tema1_nn', 'tema2_nn', 'tema3_nn', 'tema4_nn']
df = df.drop(duplicate_columns, axis=1)

##### Changing order of columns

In [55]:
column_order = ['tilsynid', 'navn', 'orgnummer', 'adrlinje1', 'postnr', 'poststed', \
'tema1_no', 'tema2_no', 'tema3_no', 'tema4_no', 'karakter1', 'karakter2', 'karakter3', 'karakter4', \
'total_karakter', 'dato', 'tilsynsbesoektype', 'tilsynsobjektid', 'sakref', 'status']
df = df.reindex(columns = column_order)

##### Renaming columns

In [56]:
df = df.rename(columns={'adrlinje1': 'adresse',
                        'tema1_no': 'kategori1',
                        'tema2_no': 'kategori2',
                        'tema3_no': 'kategori3',
                        'tema4_no': 'kategori4',
                        'navn': 'org_navn'})

##### Filling empty rows with 0

In [59]:
# Turning empty string spaces ('') into NaN values
df = df.replace(r'^\s*$', np.nan, regex=True)


In [62]:
for i,row in df.iterrows():
    if pd.isnull(row['karakter1']):
        df.loc[i,'karakter1'] = int(0)

for i,row in df.iterrows():
    if pd.isnull(row['karakter3']):
        df.loc[i,'karakter3'] = int(0)

for i,row in df.iterrows():
    if pd.isnull(row['karakter4']):
        df.loc[i,'karakter4'] = int(0)

##### Exporting dataframe to csv

In [63]:
df.to_csv(r'..\..\Data\Extracted\mattilsynet_csv.csv', index=False)