In [27]:
import pandas as pd
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, DateTime, Float, select, Date, JSON
import logging
import os
import sys
from math import ceil
from tqdm import tqdm
from pandas.util.testing import assert_frame_equal

from wikibaseintegrator import wbi_core, wbi_login, wbi_login
from wikibaseintegrator.wbi_config import config as wbi_config

  


In [28]:
logging.getLogger().setLevel(logging.INFO)

In [29]:
BRICKIT_BD_LOGIN = 'postgres'
BRICKIT_BD_PASSWORD = 'pass'
BRICKIT_BD_HOST = 'host'
WIKIBASE_HOST = '84.201.142.182'

WIKIBASE_LOGIN = 'WikibaseAdmin'
WIKIBASE_PASSWORD = 'WikibaseDockerAdminPass'

connection_string = f'postgresql://{BRICKIT_BD_LOGIN}:{BRICKIT_BD_PASSWORD}@{BRICKIT_BD_HOST}:5432/holybricks'
engine_pg = create_engine(connection_string)

wbi_config['MEDIAWIKI_API_URL'] = f'http://{WIKIBASE_HOST}:8181/api.php'
wbi_config['SPARQL_ENDPOINT_URL'] = f'http://{WIKIBASE_HOST}:8989/bigdata/sparql'
wbi_config['WIKIBASE_URL'] = 'http://wikibase.svc'

In [30]:
# Легаси, уже умеем нормально
ITEMS_DICT = {
    'I': {
        'Brickit Company': 'Q1',
        'Brickit Image': 'Q2',
        'Brickit Part': 'Q3'

    },
    'P': {
        'instance of': 'P1',
        'Image URL': 'P2',
        'Image ID': 'P3',
        'Part Name': 'P4',
        'Part Tag': 'P5',
        'Part Image': 'P6',
        'Part Num': 'P7',
        'Part Child': 'P8'
    }    
}

# Забрать какие-то данные из Brickit

In [None]:
df_images = pd.read_sql(f"""
SELECT id, public_url
FROM staging.manual_images
WHERE initial_entity_type = 'part'
    """, con = engine_pg)


df_parts = pd.read_sql(f"""
SELECT part_num, "name", tag, part_cat_id, child_part_nums, image_id
FROM staging.synthetic_parts
    """, con = engine_pg)

# Залить сущности

In [None]:
# Костыль для связки наша сущность - ID в Wikibase/
# Предполагаем, что это можно взять при помощи SPARQL
# А пока что по результатам циклов ниже в эти датафреймы дописывается ID Wikibase и сохраняются в csv
# df_images = pd.read_csv('./df_images.csv')
# df_parts = pd.read_csv('./df_parts.csv')

In [None]:
login_instance = wbi_login.Login(user=WIKIBASE_LOGIN, pwd=WIKIBASE_PASSWORD)   

In [None]:
# Загрузить 100 изображений из всех
for _, img_i in df_images[:100].iterrows():
    data = [
        wbi_core.Url(str(img_i['public_url']), prop_nr=ITEMS_DICT['P']['Image URL']),
        wbi_core.String(str(img_i['id']), prop_nr=ITEMS_DICT['P']['Image ID']),
        wbi_core.ItemID(ITEMS_DICT['I']['Brickit Image'], prop_nr=ITEMS_DICT['P']['instance of'])
    ]
    item = wbi_core.ItemEngine(new_item=True, data=data,core_props=set())
    
    # Этот метод в библиотеке из коробки не работает. 
    # Надо либо закомментить в библиотеке в wbi_core.ItemEngine.set_label() условие после "Skip set_label if the item already have one and if_exists is at 'KEEP'"
    # Либо просто не проставлять лейблы
    item.set_label('img_' + str(img_i['id']), if_exists='REPLACE')
    
    r = item.write(login_instance)    
    
    df_images.loc[df_images.id == img_i['id'], 'entity_id'] = r

In [None]:
df_images.to_csv('./df_images.csv', index = False) #Обновляем "базу знаний" про связки с id Wikibase

In [None]:
# Заливка деталей
df_parts_img = df_parts[df_parts.image_id.isin(df_images[~df_images.entity_id.isnull()].id)] #только тех, для которых мы уже залили картинки
for _, part_i in df_parts_img.iterrows():
    part_img = part_i['image_id']
    img_Q = df_images[df_images.id == part_img].reset_index().at[0, 'entity_id']
    
    data = [
        wbi_core.String(str(part_i['name']), prop_nr=ITEMS_DICT['P']['Part Name']),
        wbi_core.String(str(part_i['tag']), prop_nr=ITEMS_DICT['P']['Part Tag']),
        wbi_core.String(str(part_i['part_num']), prop_nr=ITEMS_DICT['P']['Part Num']),
        wbi_core.ItemID(ITEMS_DICT['I']['Brickit Part'], prop_nr=ITEMS_DICT['P']['instance of']),
        wbi_core.ItemID(img_Q, prop_nr=ITEMS_DICT['P']['Part Image'])
    ]
    item = wbi_core.ItemEngine(new_item=True, data=data,core_props=set())
    
    # Этот метод в библиотеке из коробки не работает. 
    item.set_label('part_num_' + str(part_i['part_num']), if_exists='REPLACE')
    
    r = item.write(login_instance)    
    
    df_parts.loc[df_parts.part_num == part_i['part_num'], 'entity_id'] = r

In [None]:
df_parts.to_csv('./df_parts.csv', index = False) #Обновляем "базу знаний" про связки с id Wikibase

# Эвотор

In [155]:
# Выгрузка схемы базы из Эвотора
'''
select col.column_id, 
       col.owner as schema_name,
       col.table_name, 
       col.column_name, 
       col.data_type, 
       col.data_length, 
       col.data_precision, 
       col.data_scale, 
       col.nullable
from sys.all_tab_columns col
inner join sys.all_tables t on col.owner = t.owner 
                              and col.table_name = t.table_name
-- excluding some Oracle maintained schemas
where col.owner not in ('ANONYMOUS','CTXSYS','DBSNMP','EXFSYS', 'LBACSYS', 
   'MDSYS', 'MGMT_VIEW','OLAPSYS','OWBSYS','ORDPLUGINS', 'ORDSYS','OUTLN', 
   'SI_INFORMTN_SCHEMA','SYS','SYSMAN','SYSTEM','TSMSYS','WK_TEST','WKSYS', 
   'WKPROXY','WMSYS','XDB','APEX_040000', 'APEX_PUBLIC_USER','DIP', 
   'FLOWS_30000','FLOWS_FILES','MDDATA', 'ORACLE_OCM', 'XS$NULL',
   'SPATIAL_CSW_ADMIN_USR', 'SPATIAL_WFS_ADMIN_USR', 'PUBLIC')  
order by col.owner, col.table_name, col.column_id;
'''

df_e = pd.read_csv('./evotor_schemas.csv')

In [None]:
# Возьмём самые ходовые схемы для тестов
schema_list = [
    'AIRFLOW', 
    'BIGDATA_LOADER', 
    'EVOTOR_ANALYTICS', 
    'EVOTOR_BIGDATA', 
    'EVOTOR_MARKET_REPL',
    'EVOTOR_REPORTS',
    'EVOTOR_CRM'
]
df_e = df_e[df_e.SCHEMA_NAME.isin(schema_list)]

In [None]:
# Какие properties будут нужны 
prop_list = ['Field', 'Description', 'Schema','Table','located in','Data Type','Data Length']
prop_df = get_items_by_label(prop_list, item_type = 'P')
prop_df = {i['label']: i['item'] for _, i in prop_df.iterrows()}

## Создание схем

In [None]:
Q_database = get_items_by_label(['dwh'], item_type = 'Q').at[0, 'item']
for schema in schema_list:
    data = [
        wbi_core.ItemID(Q_database, prop_nr=prop_df['located in'])
    ]
    item = wbi_core.ItemEngine(new_item=True, data=data,core_props=set())

    item.set_label(schema, if_exists='REPLACE')

    r = item.write(login_instance)

## Создание таблиц

In [None]:
for schema in tqdm(schema_list):
    print(schema)
    Q_schema = get_items_by_label([schema], item_type = 'Q').at[0, 'item'] # ID объекта DWH
    df_schema = df_e[df_e.SCHEMA_NAME == schema]
    
    # Таблицы
    for table in tqdm(df_schema.TABLE_NAME.unique()):
        if '#' in table:
            continue
        
        df_table = df_schema[df_schema.TABLE_NAME == table]
        
        fields = []
        # Квалифаеры
        for _, field in df_table.iterrows(): 
            qualifiers = [
                wbi_core.String(field['DATA_TYPE'], prop_nr=prop_df['Data Type'], is_qualifier = True),
                wbi_core.String(str(field['DATA_LENGTH']), prop_nr=prop_df['Data Length'], is_qualifier = True),
            ]
            # Поля
            fields.append(wbi_core.String(field['COLUMN_NAME'], prop_nr=prop_df['Field'], qualifiers=qualifiers))
                
        data = [wbi_core.ItemID(Q_schema, prop_nr=prop_df['located in'])]
        data.extend(fields)
        
        item = wbi_core.ItemEngine(new_item=True, data=data,core_props=set())

        item.set_label(schema + '.' + table, if_exists='REPLACE')

        r = item.write(login_instance)

## Перечислить таблицы в схемах

In [None]:
for schema in schema_list:
    print(schema)
    Q_schema = get_items_by_label([schema], item_type = 'Q').at[0, 'item'] # ID объекта DWH

    df_schema = df_e[(df_e.SCHEMA_NAME == schema) & (~df_e.TABLE_NAME.str.contains('#'))]
    df_schema['lables'] = df_schema.SCHEMA_NAME + '.' + df_schema.TABLE_NAME
    
    lables_list = list(set(df_schema.lables))
    
    batch = 25
    Q_tables = []
    for i in tqdm(range(ceil(len(lables_list) / batch)), desc = 'batches'):
        lables_list_i = lables_list[batch*i : batch*(i+1)]
        Q_tables_i = get_items_by_label(lables_list_i, item_type = 'Q').item.to_list()
        Q_tables.extend(Q_tables_i)
        
    data = [wbi_core.ItemID(Q_i, prop_nr=prop_df['Table']) for Q_i in Q_tables]

    item = wbi_core.ItemEngine(new_item=False, item_id = Q_schema, data=data,core_props=set())

    item.write(login_instance)

---
# Классы

In [304]:
def get_items_by_label(label_list:list, item_type:str, is_unique:bool = True, is_notnull:bool = True):
    '''
    По переданному списку лейблов находит entity_id в базе Wikibase. 
    
    label_list: список искомых лейблов
    item_type: тип искомого объекта. Если не указано, то любой объект. Если указано:
        "P" - Property
        "Q" - Item
    is_unique: если True, то вернёт ошибку, если найдено больше одного значения
    is_notnull: если True, то вернёт ошибку, если не найдено ни одного значения
    '''
    
    query = """
        SELECT DISTINCT ?item ?itemLabel
        WHERE {{
          ?item rdfs:label ?itemLabel. 

          VALUES ?itemLabel {{ {label_filter} }}
        }}""".format(label_filter = ' '.join([f'\"{i}\"@en' for i in label_list]))
    
    result = wbi_core.ItemEngine.execute_sparql_query(query)
    result_list = [[i['itemLabel']['value'], i['item']['value'].replace('http://wikibase.svc/entity/', '')] 
                   for i in result['results']['bindings']]
    
    df = pd.DataFrame(result_list, columns = ['label', 'item'])
        
    if item_type in ('P', 'Q'):
        df = df[df.item.str.contains(item_type)] 

    df_check = df.groupby('label').count()
    if is_unique and df_check.item.max() > 1:
        r = df[df.label.isin(df_check[df_check.item > 1].index.to_list())].sort_values(by = 'label')
        logging.info(f"entity_id определён неоднозначно: \n{r}")
        return None
    elif is_notnull and len(set(label_list) - set(df.label)) > 0:
        r = set(label_list) - set(df.label)
        logging.info(f"entity_id не найден: \n{r}!")
        return None
    else:
        return df
    
    
def get_wb_parent(Q:str, P:str, login_instance:wbi_login.Login) -> str:
    '''
    Свойством P какого объекта-родителя является объект Q? Возвращает ошибку, если родителей ноль или несколько.
        Q - целевой объект
        P - каким параметром он должен быть
    '''

    query = f'''
        SELECT ?entity_id ?entity_name WHERE {{
            ?entity_id wdt:{P} wd:{Q} .
            ?entity_id rdfs:label ?entity_name .
        }}'''
    print(query)
    result = wbi_core.ItemEngine.execute_sparql_query(query)

    result_list = [[i['entity_name']['value'], i['entity_id']['value'].replace('http://wikibase.svc/entity/', '')]
                   for i in result['results']['bindings']]

    result_df = pd.DataFrame(result_list, columns = ['entity_name', 'entity_id'])

    if result_df.shape[0] > 1:
        raise Exception(f'Object with entity_id {Q} have several parents: \n{result_df.entity_id.to_list()}')
    elif result_df.shape[0] == 0:
        raise Exception(f'Object with entity_id {Q} not finded in !')
    else:
        return (result_df.at[0, 'entity_name'], result_df.at[0, 'entity_id'])

    
def get_wb_statement(Q:str, property_dict:dict, login_instance:wbi_login.Login) -> pd.DataFrame:
    '''
    Для объекта Q для стейтмента P вывести все его значения с квалифаерами
        Q - для какого объекта ищем
        P - какой стейтмент (например, P11 - Field)
        property_dict - словарь вида:
            {
                statement: 'P111'
                core_qualifiers: {
                   'any_title': 'P123'
                },
                custom_qualifiers: {
                    'any_title': 'P456'
                }                
            }
            , где core_fields - обязательные поля, custom_fields - опциональные, могут быть пустыми        
    '''
    P = property_dict['statement']

    q_column_names = ['?{} '.format(i.replace(' ', '_')) for i in property_dict['core_qualifiers'].keys()]
    q_column_names_custom = ['?{} '.format(i.replace(' ', '_')) for i in property_dict['custom_qualifiers'].keys()]
    q_column_names.extend(q_column_names_custom)
    q_column_names = ' '.join(q_column_names)
    
    q_where_core_fields = '\n'.join([
        '?statement pq:{} ?{}.'.format(P, title.replace(' ', '_')) \
            for title, P in property_dict['core_qualifiers'].items()
    ])
    
    q_where_custom_fields = '\n'.join([
        'OPTIONAL {{ ?statement pq:{} ?{}. }}'.format(P, title.replace(' ', '_')) \
            for title, P in property_dict['custom_qualifiers'].items()
    ])
    
    
    query = f'''
        SELECT ?STATEMENT ?STATEMENT_LABEL {q_column_names}
        WHERE
        {{
             wd:{Q} p:{P} ?statement.
             ?statement ps:{P} ?STATEMENT.

             {q_where_core_fields}
             
             OPTIONAL {{ ?STATEMENT rdfs:label ?STATEMENT_LABEL }}

             {q_where_custom_fields}                 
        }}        
    '''

    result = wbi_core.ItemEngine.execute_sparql_query(query)

    wb_fields_df = []
    for bind in result['results']['bindings']:
        wb_fields_df.append({k: v['value'] for k, v in bind.items()})
    wb_fields_df = pd.DataFrame(wb_fields_df)        

    wb_fields_df['STATEMENT'] = wb_fields_df['STATEMENT'].str.replace('http://wikibase.svc/entity/', '')
    # Тут хитрость. API SPARQL не вернёт колонку STATEMENT, если ни для одного значения не будет label
    # А это тот случай, когда STATEMENT - не айтемы
    if 'STATEMENT_LABEL' in wb_fields_df.columns: 
        wb_fields_df.rename(columns = {
            'STATEMENT': 'ITEM_ID',
            'STATEMENT_LABEL': 'STATEMENT'
        }, inplace = True)
        
    return wb_fields_df

In [34]:
df_input = pd.read_csv('./evotor_schemas.csv')

login_instance = wbi_login.Login(user=WIKIBASE_LOGIN, pwd=WIKIBASE_PASSWORD)   

In [208]:
GLOBAL_PROPERTIES = {
    i['label']: i['item'] \
        for _, i in get_items_by_label(['located_in'], item_type = 'P').iterrows()
}
P_TABLE = get_items_by_label(['TABLE'], item_type = 'P').at[0, 'item']
P_SCHEMA = get_items_by_label(['SCHEMA'], item_type = 'P').at[0, 'item']
P_DATABASE =  get_items_by_label(['DATABASE'], item_type = 'P').at[0, 'item']
P_COLUMN =  get_items_by_label(['COLUMN'], item_type = 'P').at[0, 'item']



PROPERTY_DICT = {
    'COMPANY': {
        'repeated_statement': {   
            'label': 'DATABASE',   
            'statement': P_DATABASE,     
            'core_qualifiers': {},
            'custom_qualifiers': {
                i['label']: i['item'] \
                    for _, i in get_items_by_label(['DESCRIPTION'], item_type = 'P').iterrows()
            }        
        },
        'global': GLOBAL_PROPERTIES
    },
    'DATABASE': {
        'in_property': P_DATABASE,
        'repeated_statement': {     
            'label': 'SCHEMA',    
            'statement': P_SCHEMA,        
            'core_qualifiers': {},
            'custom_qualifiers': {
                i['label']: i['item'] \
                    for _, i in get_items_by_label(['DESCRIPTION'], item_type = 'P').iterrows()
            }        
        },
        'global': GLOBAL_PROPERTIES       
    },
    'SCHEMA': {
        'in_property': P_SCHEMA,
        'repeated_statement': {   
            'label': 'TABLE',     
            'statement': P_TABLE,        
            'core_qualifiers': {},
            'custom_qualifiers': {
                i['label']: i['item'] \
                    for _, i in get_items_by_label(['DESCRIPTION'], item_type = 'P').iterrows()
            }        
        },
        'global': GLOBAL_PROPERTIES   
    },
    'TABLE': {
        'in_property': P_TABLE,
        'repeated_statement': {
            'label': 'COLUMN',
            'statement': P_COLUMN,        
            'core_qualifiers': {
                i['label']: i['item'] \
                    for _, i in get_items_by_label(['DATA_TYPE','DATA_LENGTH'], item_type = 'P').iterrows()
            },
            'custom_qualifiers': {
                i['label']: i['item'] \
                    for _, i in get_items_by_label(['DESCRIPTION'], item_type = 'P').iterrows()
            }        
        },
        'global': GLOBAL_PROPERTIES
    },
}

In [305]:
class WikiObject():
    def _fetch_qualifiers(self):
        if self.new_item:
            self.resolved_fields = df_input.copy()
        elif self.df_input.shape[0] == 0:
            logging.info('No input dataframe, can`t fetch!')
        else:
            merge_column = self.properties_dict['repeated_statement']['label']
            
            logging.info("""
                {columns} statements in new table
                {added} statements are added
                {deleted} statements are deleted              
            """.format(
                columns = self.df_input.shape[0],
                added = self.df_input[~self.df_input[merge_column].isin(self.wb_fields.STATEMENT)].shape[0],
                deleted = self.wb_fields[~self.wb_fields.STATEMENT.isin(self.df_input[merge_column])].shape[0]                    
            ))

            fetched_column = [merge_column]
            fetched_column.extend(self.properties_dict['repeated_statement']['custom_qualifiers'].keys())
            
            
            self.resolved_fields = self.df_input.merge(self.wb_fields.STATEMENT,
                                                 how = 'outer', left_on = merge_column, right_on = 'STATEMENT')
            
            self.resolved_fields['is_deleted'] = self.resolved_fields\
                .apply(lambda row: 1 if pd.isnull(row[merge_column]) else 0, axis = 1)
            
            return self.resolved_fields
    
    
    def _set_vars(self):
        Q_df = get_items_by_label([self.name], item_type = 'Q')
        if Q_df is None:  
            # Новый объект
            logging.info(f'No such object: {name}! New one will be created.')
            assert self.df_input.shape[0] > 1 , 'Cannot create new item from empty input DataFrame!'
            
            parent_label = df_input[self.in_property_label].unique()
            assert parent_label.shape[0] == 1 , f'Ambiguous parent_label: {parent_label} '
            
            self.parent_label = parent_label[0]
            self.Q_parent = get_items_by_label([self.parent_label], item_type = 'Q').at[0, 'item']            
            self.Q = None
            self.new_item = True            
        else:
            # Существующий объект
            self.Q = Q_df.at[0, 'item']
            self.parent_label, self.Q_parent = get_wb_parent(self.Q, self.properties_dict['in_property'], self.login_instance)
            self.wb_fields = get_wb_statement(self.Q, self.properties_dict['repeated_statement'], self.login_instance)
            self.new_item = False
        
        self._fetch_qualifiers()
        logging.info("""
            Object {name} (entity_id: {Q}), parent {parent_label} (entity_id: {Qp})
        """.format(
            name = self.name,
            Q = self.Q,
            parent_label = self.parent_label,
            Qp = self.Q_parent
        ))
        
        
    def delete_item(self):
        pass

In [306]:
class WikiTable(WikiObject): 
    def __init__(
        self, 
        name:str, 
        properties_dict:dict, 
        login_instance:wbi_login.Login, 
        df_input = pd.DataFrame()
    ):        
        self.name = name
        self.login_instance = login_instance
        self.properties_dict = properties_dict['TABLE']
        self.df_input = df_input
        self.in_property_label = 'SCHEMA'
        self._set_vars()
        self.df_input = df_input
    
    
    def push_to_wiki(self):
        # TO DO: проверка на дубликаты полей
        # А это можно на SPARQL сделать? Хотя зачем. 
        self.resolved_fields

        fields = []
        # Квалифаеры
        for _, field in self.resolved_fields.iterrows(): 
            qualifiers = [
                wbi_core.String(field['DATA_TYPE'], prop_nr=prop_df['Data Type'], is_qualifier = True),
                wbi_core.String(str(field['DATA_LENGTH']), prop_nr=prop_df['Data Length'], is_qualifier = True),
            ]
            description_i =  field['DESCRIPTION']
            if description_i:
                qualifiers.append(
                    wbi_core.String(str(description_i), prop_nr=prop_df['Description'], is_qualifier = True),
                )
            
            # Поля
            fields.append(wbi_core.String(field['COLUMN_NAME'], prop_nr=prop_df['Field'], qualifiers=qualifiers))

        data = [wbi_core.ItemID(self.Q_schema, prop_nr=prop_df['located in'])]
        data.extend(fields)

        item = wbi_core.ItemEngine(new_item=self.new_item, data=data,core_props=set())

        if self.new_item:
            item.set_label(schema + '.' + table, if_exists='REPLACE')

        r = item.write(login_instance)

In [307]:
class WikiSchema(WikiObject): 
    def __init__(
        self, 
        name:str, 
        properties_dict:dict, 
        login_instance:wbi_login.Login, 
        df_input = pd.DataFrame()
    ):        
        self.name = name
        self.login_instance = login_instance
        self.properties_dict = properties_dict['SCHEMA']
        self.df_input = df_input
        self.in_property_label = 'DATABASE'
        self._set_vars()

In [308]:
class WikiDatabase(WikiObject): 
    def __init__(
        self, 
        name:str, 
        properties_dict:dict, 
        login_instance:wbi_login.Login, 
        df_input = pd.DataFrame()
    ):        
        self.name = name
        self.login_instance = login_instance
        self.properties_dict = properties_dict['DATABASE']
        self.df_input = df_input
        self.in_property_label = 'COMPANY'
        self._set_vars()

In [299]:
df_input_t = df_input[df_input.TABLE == 'TMP_MP_REVENUE']
df_input_t = df_input_t.drop([962, 1027])
wt = WikiTable('AIRFLOW.TMP_MP_REVENUE', PROPERTY_DICT, login_instance, df_input_t)
wt.resolved_fields.tail()

INFO:root:
                67 statements in new table
                0 statements are added
                2 statements are deleted              
            
INFO:root:
            Object AIRFLOW.TMP_MP_REVENUE (entity_id: Q270), parent AIRFLOW (entity_id: Q203)
        


Unnamed: 0,COLUMN_ID,SCHEMA,TABLE,COLUMN,DATA_TYPE,DATA_LENGTH,DATA_PRECISION,DATA_SCALE,NULLABLE,STATEMENT,is_deleted
64,66.0,AIRFLOW,TMP_MP_REVENUE,FL_SAAS,NUMBER,22.0,,,Y,FL_SAAS,0
65,68.0,AIRFLOW,TMP_MP_REVENUE,QUANTITY,NUMBER,22.0,,0.0,Y,QUANTITY,0
66,69.0,AIRFLOW,TMP_MP_REVENUE,VAT_SUM,NUMBER,22.0,,,Y,VAT_SUM,0
67,,,,,,,,,,NAME,1
68,,,,,,,,,,CHANNEL,1


In [313]:
df_input_s = df_input[df_input.SCHEMA == 'AIRFLOW'][['SCHEMA', 'TABLE']].drop_duplicates()
df_input_s['TABLE'] = df_input_s['SCHEMA'] + '.' + df_input_s['TABLE']
df_input_s = df_input_s.drop([0, 1124])
ws = WikiDatabase('AIRFLOW', PROPERTY_DICT, login_instance, df_input_s)
ws.resolved_fields.tail()

INFO:root:
                63 statements in new table
                0 statements are added
                3 statements are deleted              
            
INFO:root:
            Object AIRFLOW (entity_id: Q203), parent dwh (entity_id: Q196)
        



        SELECT ?entity_id ?entity_name WHERE {
            ?entity_id wdt:P15 wd:Q203 .
            ?entity_id rdfs:label ?entity_name .
        }


Unnamed: 0,SCHEMA,TABLE,STATEMENT,is_deleted
61,AIRFLOW,AIRFLOW.TMP_SECURITY_USER_GROUP,AIRFLOW.TMP_SECURITY_USER_GROUP,0
62,AIRFLOW,AIRFLOW.T_NAME_STEP_2,AIRFLOW.T_NAME_STEP_2,0
63,,,AIRFLOW.TMP_BPLK_1_RUS,1
64,,,AIRFLOW.AF_SALES_REPORT_CORE_PARTNER_1C,1
65,,,AIRFLOW.TMP_SECURITY_USER_PROFILE,1


In [None]:
df_input_s = df_input[df_input.SCHEMA == 'AIRFLOW'][['SCHEMA', 'TABLE']].drop_duplicates()
df_input_s['TABLE'] = df_input_s['SCHEMA'] + '.' + df_input_s['TABLE']
df_input_s = df_input_s.drop([0, 1124])
ws = WikiSchema('AIRFLOW', PROPERTY_DICT, login_instance, df_input_s)
ws.resolved_fields.tail()

In [329]:
schema_list = [
    'AIRFLOW', 
    'BIGDATA_LOADER', 
    'EVOTOR_ANALYTICS', 
    'EVOTOR_BIGDATA', 
    'EVOTOR_MARKET_REPL',
    'EVOTOR_REPORTS'
#     'EVOTOR_CRM'
]

df_input_d = df_input[['SCHEMA']].drop_duplicates()
df_input_d['DATABASE'] = 'dwh'
df_input_d = df_input_d[df_input_d.SCHEMA.isin(schema_list)]
wd = WikiDatabase('dwh', PROPERTY_DICT, login_instance, df_input_d)
wd.resolved_fields.tail()

INFO:root:
                6 statements in new table
                0 statements are added
                1 statements are deleted              
            
INFO:root:
            Object dwh (entity_id: Q196), parent Evotor Company (entity_id: Q191)
        



        SELECT ?entity_id ?entity_name WHERE {
            ?entity_id wdt:P14 wd:Q196 .
            ?entity_id rdfs:label ?entity_name .
        }


Unnamed: 0,SCHEMA,DATABASE,STATEMENT,is_deleted
2,EVOTOR_ANALYTICS,dwh,EVOTOR_ANALYTICS,0
3,EVOTOR_BIGDATA,dwh,EVOTOR_BIGDATA,0
4,EVOTOR_MARKET_REPL,dwh,EVOTOR_MARKET_REPL,0
5,EVOTOR_REPORTS,dwh,EVOTOR_REPORTS,0
6,,,EVOTOR_CRM,1


# Прочие знания

In [None]:
# API поиска
wbi_core.ItemEngine.get_search_results('part_num_10793') 

In [None]:
# Дескрипшн
set_description(self, description, lang=None, if_exists='REPLACE'):

In [None]:
# Обновление существующих айтемов
data = [
    wbi_core.ItemID(img_Q, prop_nr = ITEMS_DICT['P']['Part Image'])
]
item = wbi_core.ItemEngine(new_item=False, item_id = 'Q1234', data=data,core_props=set())
r = item.write(login_instance)  