In [1]:
import dataverk
import dataparcel
import urllib.request
import urllib.parse
import pandas as pd
from os import environ
from xml.etree import ElementTree
from googleplaces import GooglePlaces

In [2]:
environ["DATAVERK_NO_SETTINGS_SECRETS"] = "True"
dv = dataverk.Dataverk()

In [3]:
# Lager liste over alle NAV-kontorer (hentes fra nav.no)

xmltag = 'office'
url = 'https://www.nav.no/no/NAV+og+samfunn/Kontakt+NAV/Relatert+informasjon/_attachment/805355552?_ts=16639744488'

response = urllib.request.urlopen(url)
xml = ElementTree.parse(response)

pandas_array = list()
pandas_array.append(list())
for elem in xml.iter(tag=xmltag):
    pandas_array[0].append(elem.text)

df_kontorer = pd.DataFrame(pandas_array, index=[xmltag]).transpose()
df_kontorer.drop_duplicates(keep='first', inplace=True)
df_kontorer = df_kontorer[df_kontorer.office != ' NAV Inn-Trøndelag']

In [4]:
df_kontorer.duplicated().any()
office_list = df_kontorer["office"].tolist()

In [5]:
len(office_list)

412

In [6]:
# For test
office_list = office_list[0:2]

In [7]:
google_places = GooglePlaces(api_key=dv.context.settings["google_api_key"])

In [8]:
df_loc = pd.DataFrame(columns=['name', 'address', 'latitude', 'longitude'])
df_rating = pd.DataFrame(columns=['name', 'address', 'rating'])

In [9]:
for office in office_list:
    query_result = google_places.text_search(query=office)
    for place in query_result.places:
        place.get_details()
    
        if "NAV" in place.name or "Nav" in place.name:
            df_loc = df_loc.append(pd.Series([place.name, place.formatted_address, str(place.geo_location["lat"]), str(place.geo_location["lng"])], index=['name', 'address', 'latitude', 'longitude']), ignore_index=True)
            df_rating = df_rating.append(pd.Series([place.name, place.formatted_address, str(place.rating)], index=['name', 'address', 'rating']), ignore_index=True)

In [10]:
df_loc.head()

Unnamed: 0,name,address,latitude,longitude
0,NAV St. Hanshaugen,"Pilestredet 56, 0167 Oslo, Norway",59.9241143,10.7327211
1,NAV Nordstrand,"Cecilie Thoresens vei 1, 1153 Oslo, Norway",59.8792689,10.8071679


In [11]:
df_rating.head()

Unnamed: 0,name,address,rating
0,NAV St. Hanshaugen,"Pilestredet 56, 0167 Oslo, Norway",4.0
1,NAV Nordstrand,"Cecilie Thoresens vei 1, 1153 Oslo, Norway",3.3


In [12]:
rating_spread = [0, 0, 0, 0, 0, 0]

for index, rating in df_rating.iterrows():
    rating_spread[int(float(df_rating.at[index, "rating"]))] += 1

df_rating_spread = pd.DataFrame({'Antall kontorer': rating_spread, 'rating': ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6']})

In [22]:
METADATA = {
    'id': 'nav-kontorer-id',
    'path': 'https://www.googleapis.com/storage/v1/opendata-nav/NAV-kontorer',
    'title': 'NAV kontorer',
    'readme': '',
    'license':'MIT',
    'accessRights':'Open',
    'auth': 'NAV',
    'description':'NAV kontorer',
    'name':'NAV-kontorer',
    'source':'NAV',
    'keywords':['NAV, kontorer'],
    'provenance':'NAV',
    'publisher': 'NAV',
    'project': 'odata', 
    'user':'nav-examples',
    'bucket_name': 'opendata-nav',
    'bucket': 'opendata-nav',
    'dataset':'NAV-kontorer'
}

In [23]:
# Lager en datapakke og legger datasettene inn i pakken
dp = dataparcel.Datapackage(METADATA)

dp.add_resource(df=df_loc, dataset_name="nav-kontorer-lokasjon", dataset_description="Nav kontorer med adresse og geolokasjon")
dp.add_resource(df=df_rating, dataset_name="nav-kontorer-rating", dataset_description="Nav kontorer med adresse og bruker-rating")
dp.add_resource(df=df_rating_spread, dataset_name="spredning-av-rating", dataset_description="Spredning av ratings for NAV kontorer")

spec = {
    "type": "bar",
    "group": "rating",
    "series": ["Antall kontorer"]
}

dp.add_view(name="Spredning av rating", title="Rating av NAV kontorer", resources=["spredning-av-rating"], spec_type="simple", spec=spec)
dp.add_view(name="Rating av NAV kontorer", title="Rating av NAV kontorer", resources=["nav-kontorer-rating"], row_limit=500, spec_type="table")
dp.add_view(name="Geolokasjon for NAV kontorer", title="Geolokasjon for NAV kontorer", resources=["nav-kontorer-lokasjon"], row_limit=500, spec_type="table")

In [24]:
dp.write_datapackage()
dv.publish(dp)

2019-05-19T21:45:37.761999: V153753 <class 'dataverk.connectors.google_storage.GoogleStorageConnector'>: String (format: json) written to https://storage.googleapis.com/opendata-nav/NAV-kontorer/datapackage.json
2019-05-19T21:45:38.100069: V153753 <class 'dataverk.connectors.google_storage.GoogleStorageConnector'>: String (format: csv) written to https://storage.googleapis.com/opendata-nav/NAV-kontorer/resources/nav-kontorer-lokasjon.csv
2019-05-19T21:45:38.785417: V153753 <class 'dataverk.connectors.google_storage.GoogleStorageConnector'>: String (format: csv) written to https://storage.googleapis.com/opendata-nav/NAV-kontorer/resources/nav-kontorer-rating.csv
2019-05-19T21:45:39.116850: V153753 <class 'dataverk.connectors.google_storage.GoogleStorageConnector'>: String (format: csv) written to https://storage.googleapis.com/opendata-nav/NAV-kontorer/resources/spredning-av-rating.csv
2019-05-19T21:45:39.411994: V153753 <class 'dataverk.connectors.elasticsearch.ElasticsearchConnector'>