In [1]:
import config
import pymongo
import numpy as np
import pandas as pd
import json
import re
import copy
import ast

from ipywidgets import interact, Dropdown

pd.set_option('display.max_rows', 200)

In [2]:
myclient = pymongo.MongoClient(str(config.MONGO_URI))
stagingDb = myclient[str(config.DB_STAGING)]
analyseDb = myclient[str(config.DB_ANALYSE)]
stagingCol = stagingDb[config.COLL_PLAATJES]
stagingOud = stagingDb[config.COLL_STAGING_OUD]
stagingNieuw = stagingDb[config.COLL_STAGING_NIEUW]
analyseCol = analyseDb[config.COLL_ANALYSE]
analyseColClean = analyseDb[config.COLL_ANALYSE_CLEAN]
metaCollection = stagingDb['Kolominformatie']

AIRFLOW_WASSTRAAT_CONFIG = "./wasstraat_config/Wasstraat_Config_Harmonize.xlsx"

In [3]:
xl = pd.read_excel(AIRFLOW_WASSTRAAT_CONFIG, None);
df_table = xl['Objecten']
df_table.head(30)

Unnamed: 0,Object,Tabellen,Overerven,Samenvoegen
0,Ignore,"["".*backup.*"", "".*kopie.*""]",,
1,Tekening,"[""TEKENING.*""]",,
2,Vondst,"[""VONDSTENLIJST"", ""VONDST"", ""VONDSTINHD""]",,
3,Spoor,"[""SPOREN"", ""SPOOR""]",,
4,Vulling,"[""VULLING.*""]",,
5,Dia,"[""DIA.*""]",,
6,Foto,"[""FOTO.*""]",,
7,Put,"[""PUT"", ""PUTTEN""]",,
8,Artefact,"[""ARTEFACT.*"", "".*AARDEWERK.*"", "".*steen.*"", ""...",,
9,Hout,"[""HOUT""]",Artefact,


In [4]:
objecten = list(xl.keys())
objecten.pop(0)

df_attr = pd.DataFrame()
for obj in objecten:
    df_tmp = xl[obj]
    df_tmp['Object'] = obj
    df_attr = pd.concat([df_attr, df_tmp])
     
df_attr['Kolommen'] = df_attr.apply(lambda x: ast.literal_eval(x['Kolommen']), axis=1)
df_attr = df_attr.explode('Kolommen').reset_index().drop(['index'], axis=1)

# Add Inherited attributes
df_overerven = pd.DataFrame()
for index, row in df_table[df_table.Overerven.notnull()].iterrows():
    df_tmp = df_attr[df_attr.Object == row['Overerven']].copy()
    df_tmp['Object'] = row['Object']
    df_overerven = pd.concat([df_overerven, df_tmp])
df_attr = pd.concat([df_attr, df_overerven])
    
df_attr.head(5)

Unnamed: 0,Attribute,Kolommen,Object
0,putnr,PUT,Put
1,putnr,PUTNO,Put
2,putnr,PUT,Spoor
3,putnr,PUTNO,Spoor
4,vlaknr,VLAK,Spoor


In [5]:
def getObject(table):
    for index, row in xl['Objecten'].iterrows():
        kolommen = ast.literal_eval(row['Tabellen'])
        for kolom in kolommen:
            if re.match(kolom, str(table)):
                return row['Object']
    
    return 'Geen' #Nothing found

def flatten(lst):
    flat_list = [item for sublist in lst for item in sublist]
    return list(set(flat_list))


grp_aggr = [{"$match" : {'project': {'$nin': ['MAGAZIJN', 'DELF-IT', 'Digifotos']}, 'table': { '$not': {'$regex':"^SYS.*"}}}},
            {"$group": { "_id": {'table': "$table", 'name': '$name'}, "count": {"$sum": 1},  "omschrijvingen": { "$push": "$Description" }}},
            {'$replaceRoot': {'newRoot': {'table': "$_id.table", 'name': "$_id.name", 'count': '$count', 'omschrijvingen': "$omschrijvingen"}}}]

# Get All Attributes and set Object
df = pd.DataFrame(list(metaCollection.aggregate(grp_aggr)))
df['Object'] = df.apply(lambda x: getObject(x['table']), axis=1)

# Now get all unique attributes 
df = df.groupby(['Object', 'name']).agg({'omschrijvingen':lambda x: list(x), 'count':lambda x: sum(x)}).reset_index()
df['omschrijvingen'] = df.apply(lambda x: flatten(x['omschrijvingen']), axis=1)
df = df.rename(columns={"name": "Kolommen"})

#Merge it with the attrubutes used in the Excel to 
df = pd.merge(df, df_attr, on=['Object', 'Kolommen'], how='left')
df['Attribute'] = df['Attribute'].fillna(value="")
df.head(5)

Unnamed: 0,Object,Kolommen,omschrijvingen,count,Attribute
0,Aardewerk,(none),[],1,
1,Aardewerk,10a,[doosnummer],1,doosnr
2,Aardewerk,10b,[tekeningnummer],1,tekeningnr
3,Aardewerk,10c,[dianummer],1,dianr
4,Aardewerk,10d,[digifotonummer],1,fotonr


In [6]:
lst = df['Object'].unique()
Objecten_lst = Dropdown(options = sorted(lst))

@interact(obj = Objecten_lst)
def print_city(obj):
    display(df[df.Object == obj])


interactive(children=(Dropdown(description='obj', options=('Aardewerk', 'Artefact', 'Bot', 'Dia', 'Geen', 'Gla…

In [7]:
widgets.Dropdown(
    options=['1', '2', '3'],
    value='2',
    description='Number:',
    disabled=False,
)

NameError: name 'widgets' is not defined

In [None]:
from ipywidgets import interact, Dropdown

geo = {'USA':['CHI','NYC'],'Russia':['MOW','LED']}
countryW = Dropdown(options = geo.keys())
cityW = Dropdown()

@interact(country = countryW)
def print_city(country):
    print(country)