# OCS

In [None]:
import numpy as np
import pandas as pd
import json
import lxml.html
import datetime

## logistic/stocks/locations

In [None]:
file = open('/home/abezpalov/data/ocs/locations.json', 'r')
response = json.load(file)

df = pd.DataFrame(response)

In [None]:
def get_is_stock(row):
    return True if row['type'] in ('Local', 'ShipmentCity', 'CO') else False

def get_is_transit(row):
    return True if row['type'] in ('InternalMovement', 'TransitCO', 'OuterTransit') else False

def get_is_on_order(row):
    return False

df = df.rename(columns={'location': 'key',
                        'reservationAvailable': 'reservation_available'})
df['is_stock'] = df.apply(get_is_stock, axis=1)
df['is_transit'] = df.apply(get_is_transit, axis=1)
df['is_on_order'] = df.apply(get_is_on_order, axis=1)

In [None]:
df.head()

## catalog/categories

In [None]:
file = open('/home/abezpalov/data/ocs/categories.json', 'r')
response = json.load(file)

In [None]:
def tree_to_list(data, parent=None):
    for category in data:
        category['parent_key'] = parent
        data_list.append(category)
        tree_to_list(category['children'], parent=category['category'])

def get_order(x):
    return int(x[-2:])

data_list = list()
tree_to_list(response)

df = pd.DataFrame(data_list)
df = df.rename(columns={'category': 'key'})
df['order'] = df['key'].apply(get_order)
df = df.drop(columns=['children'])

In [None]:
df.head()

## catalog/categories/all/products

In [None]:
file = open('/home/abezpalov/data/ocs/products.json', 'r')
response = json.load(file)
response

In [None]:
def get_product_metadata(row):
    return json.dumps({'itemNameRus': row['itemNameRus'],
                       'productName': row['productName']})

def fix_description(x):
    if str == type(x):
        x = x.replace('\n', ' ')
        x = x.replace('\r', ' ')
        tree = lxml.html.fromstring(x)
        return '\n'.join(tree.xpath('.//text()'))
    return None

def get_length(row):
    if row['width_temp'] and row['depth_temp']:
        return max(float(row['width_temp']), float(row['depth_temp']))
    return None
    
def get_width(row):
    if row['width_temp'] and row['depth_temp']:
        return min(float(row['width_temp']), float(row['depth_temp']))
    return None
    
def get_height(row):
    if row['height_temp']:
        return float(row['height_temp'])
    return None

def fix_weight(x):
    return x if x else None
    
def fix_volume(x):
    return x if x else None

def get_price_in(row):
    if dict == type(row['order']):
        return row['order']['value']
    elif dict == type(row['priceList']):
        return row['priceList']['value']
    return None

def get_currency_key_in(row):
    if dict == type(row['order']):
        return 'RUB' if row['order']['currency'] == 'RUR' else row['order']['currency']
    elif dict == type(row['priceList']):
        return 'RUB' if row['priceList']['currency'] == 'RUR' else row['priceList']['currency']
    return None

def get_price_out(row):
    if dict == type(row['endUserWeb']):
        return row['endUserWeb']['value']
    elif dict == type(row['endUser']):
        return row['endUser']['value']
    return None

def get_currency_key_out(row):
    if dict == type(row['endUserWeb']):
        return 'RUB' if row['endUserWeb']['currency'] == 'RUR' else row['endUserWeb']['currency']
    elif dict == type(row['endUser']):
        return 'RUB' if row['endUser']['currency'] == 'RUR' else row['endUser']['currency']
    return None

def get_sale(x):
    return True if x == 'Sale' else False

def get_unconditional(x):
    return True if x == 'Unconditional' else False

def get_party_metadata(row):
    return json.dumps({'discountB2B': row['discountB2B'],
                       'deliveryDate': row['deliveryDate']})

def get_quantity(x):
    return int(x['value'])
    
def get_quantity_great_than(x):
    return bool(x['isGreatThan'])

def get_bool(x):
    return True if x is True else False

def get_expected_date(x):
    return datetime.datetime.strptime(x, "%Y-%m-%dT00:00:00").date() if str == type(x) else None

def get_warranty(x):
    to_m = {'Гарантия дистрибьютора 12 мес. с даты отгрузки': 12,
            'Гарантии нет. Условия уточняются у сейл-менеджера': None,
            'Срок гарантии 1 год': 12,
            'Срок гарантии 5 лет': 60,
            'Срок гарантии 2 года': 24,
            'Срок гарантии 3 года': 36,
            'Срок гарантии 6 мес.': 6,
            'Гарантия дистрибьютора 24 мес. с даты отгрузки': 24,
            'Гарантия дистрибьютора 60 мес. с даты отгрузки': 60,
            'Гарантия дистрибьютора 36 мес. с даты отгрузки': 36,
            'Гарантия дистрибьютора 120 мес. с даты отгрузки': 120,
            'Срок гарантии 4 года': 48,
            'Срок гарантии 7 лет.': 84,
            'Гарантия дистрибьютора 1 мес. с даты отгрузки': 1,
            'Гарантия дистрибьютора 3 мес. с даты отгрузки': 3,
            'Гарантия дистрибьютора 12 мес. с даты ввода в эксплуатацию': 12,
            'Гарантия дистрибьютора 24 мес. с даты ввода в эксплуатацию': 24,
            'Пожизненная гарантия': 0,
            'Срок гарантии 2 мес.': 2,
            'Гарантия дистрибьютора 84 мес. с даты отгрузки': 84,
            'Срок гарантии 1 мес.': 1,
            'Гарантия дистрибьютора 15 мес. с даты отгрузки': 15}
    return to_m[x] if x in to_m else None

products_data_list = list()
parties_data_list = list()
barcodes_data_list = list()

for item in response['result']:

    product_row_ = dict()
    
    for key in item['product']:
        product_row_[key] = item['product'][key]

    for key in item['packageInformation']:
        product_row_[key] = item['packageInformation'][key]

    products_data_list.append(product_row_)

    for party_item in item['locations']:

        party_row_ = dict()
        party_row_['product_key'] = item['product']['itemId']
        party_row_['condition'] = item['product']['condition']
        
        if item.get('price', None):
            for key in item['price']:
                party_row_[key] = item['price'][key]
        

        for key in party_item:
            party_row_[key] = party_item[key]
            
        parties_data_list.append(party_row_)

    forms = {'eaN128': 'EAN 128', 'upc': 'UPC', 'hsCode': 'HS Code', 'pnc': 'PNC'}
    for key in forms:
        for code in item['product'].get(key, '').split(','):
            code_row_ = dict()
            code_row_['product_key'] = item['product']['itemId']
            code_row_['value'] = code
            code_row_['form'] = forms[key]
            barcodes_data_list.append(code_row_)
            
df = pd.DataFrame(products_data_list)
parties_df = pd.DataFrame(parties_data_list)
barcodes_df = pd.DataFrame(barcodes_data_list)

df = df.rename(columns={'itemId': 'key',
                        'partNumber': 'part_number',
                        'producer': 'vendor_key',
                        'category': 'category_key',
                        'itemName': 'name',
                        'vatPercent': 'vat',
                        'originalCountryISOCode': 'country_key',
                        'width': 'width_temp',
                        'height': 'height_temp',
                        'depth': 'depth_temp',
                        'units': 'unit_key',
                        'productDescription': 'description',
                        'multiplicity': 'min_of_quantity',
                        'warranty': 'warranty_description'})

df['metadata'] = df.apply(get_product_metadata, axis=1)
df['length'] = df.apply(get_length, axis=1)
df['width'] = df.apply(get_width, axis=1)
df['height'] = df.apply(get_height, axis=1)
df['step_of_quantity'] = df['min_of_quantity']
df['description'] = df['description'].apply(fix_description)
df['weight'] = df['weight'].apply(fix_weight)
df['volume'] = df['volume'].apply(fix_volume)
df['warranty'] = df['warranty_description'].apply(get_warranty)

df = df.drop(columns=['itemNameRus', 'productName', 'catalogPath', 'condition', 'conditionDescription', 'serialNumberAvailability',
                      'width_temp', 'height_temp', 'depth_temp', 'productKey'])


parties_df = parties_df.rename(columns={'location': 'location_key', 
                                        'quantity': 'quantity_temp',
                                        'canReserve': 'can_reserve',
                                        'mustKeepEndUserPrice': 'must_keep_end_user_price'})
parties_df['quantity'] = parties_df['quantity_temp'].apply(get_quantity)
parties_df['quantity_great_than'] = parties_df['quantity_temp'].apply(get_quantity_great_than)
parties_df['must_keep_end_user_price'] = parties_df['must_keep_end_user_price'].apply(get_bool)
parties_df['expected_date'] = parties_df['arrivalDate'].apply(get_expected_date)

parties_df['price_in'] = parties_df.apply(get_price_in, axis=1)
parties_df['currency_key_in'] = parties_df.apply(get_currency_key_in, axis=1)
parties_df['price_out'] = parties_df.apply(get_price_out, axis=1)
parties_df['currency_key_out'] = parties_df.apply(get_currency_key_out, axis=1)
parties_df['unconditional'] = parties_df['condition'].apply(get_unconditional)
parties_df['metadata'] = parties_df.apply(get_party_metadata, axis=1)


parties_df = parties_df.drop(columns=['description', 'type', 'quantity_temp', 'priceList', 'order', 'discountB2B',
                                      'deliveryDate', 'endUser', 'arrivalDate', 'endUserWeb', 'departureDate'])

dfs = {'products': df,
       'barcodes': barcodes_df,
       'parties': parties_df}

In [None]:
dfs['products'].head()

In [None]:
dfs['barcodes']['value'].unique()

In [None]:
dfs['parties'].head()

## content/batch

In [None]:
file = open('/home/abezpalov/data/ocs/content_data_4_of_67.json', 'r')
data = json.load(file)
data

In [None]:
# Инициируем переменные
parameter_groups_df = None
parameters_df = None
values_df = None
images_df = None

# Properties
properties_data = list()

for product_item in data['result']:

    if list == type(product_item.get('properties', None)):
        for property_item in product_item['properties']:
            property = dict()
            property['product_key'] = product_item['itemId']
            property['group_key'] = property_item.get('group', None)
            property['parameter_key'] = property_item['id']
            property['name'] = property_item['name']
            property['type'] = property_item['type']
            property['value'] = property_item['value']
            property['unit_key'] = property_item.get('unit', None)
            properties_data.append(property)

if properties_data:
    df = pd.DataFrame(properties_data)

    # parameter_groups_df
    df['key'] = df['group_key']
    parameter_groups_df = df[['key']] if parameter_groups_df is None \
        else pd.concat([parameter_groups_df, df[['key']]]).copy().drop_duplicates()

    # parameters
    df['key'] = df['parameter_key']
    parameters_df = df[['key', 'group_key', 'name']].copy().drop_duplicates() if parameters_df is None \
        else pd.concat([parameters_df, df[['key', 'group_key', 'name']]]).copy().drop_duplicates()

    # values
    values_df = df[['product_key', 'group_key',  'parameter_key', 'value', 'unit_key']] \
        if values_df is None \
        else pd.concat([values_df, df[['product_key', 'group_key',  'parameter_key', 'value', 'unit_key']]])

In [None]:
parameter_groups_df

In [None]:
parameters_df

In [None]:
values_df

In [None]:
# test

properties_data = list()
images_data = list()

for product_item in contents_data['result']:

    if list == type(product_item.get('properties', None)):
        for property_item in product_item['properties']:
            property = dict()
            property['product_key'] = product_item['itemId']
            property['parameter_key'] = property_item['id']
            property['name'] = property_item['name']
            property['type'] = property_item['type']
            property['value'] = property_item['value']
            properties_data.append(property)
    
    if list == type(product_item.get('images', None)):
        for image_item in product_item['images']:
            image = dict()
            image['product_key'] = product_item['itemId']
            image['url'] = image_item['url']
            image['size'] = image_item['size']
            image['width'] = image_item['width']
            image['height'] = image_item['height']
            image['order'] = image_item['order']
            images_data.append(image)

properties_df = pd.DataFrame(properties_data)
images_df = pd.DataFrame(images_data)

In [None]:
properties_df

In [None]:
images_df

### parameters_df

In [None]:
properties_data = list()

for product_item in contents_data['result']:

    if list == type(product_item.get('properties', None)):
        for property_item in product_item['properties']:
            property = dict()
            property['product_key'] = product_item['itemId']
            property['group_key'] = property_item.get('group', None)
            property['parameter_key'] = property_item['id']
            property['name'] = property_item['name']
            property['type'] = property_item['type']
            property['value'] = property_item['value']
            property['unit_key'] = property_item.get('unit', None)
            properties_data.append(property)

df = pd.DataFrame(properties_data)
df

In [None]:
df['key'] = df['group_key']
groups_df = df['key']
groups_df

In [None]:
df['key'] = df['parameter_key']
parameters_df = df[['key', 'group_key', 'name']].copy().drop_duplicates()
parameters_df

In [None]:
values_df = df[['product_key', 'group_key', 'parameter_key', 'value', 'unit_key']]
values_df

In [None]:
images_data = list()

for product_item in contents_data['result']:

    if list == type(product_item.get('images', None)):
        for image_item in product_item['images']:
            image = dict()
            image['product_key'] = product_item['itemId']
            image['url'] = image_item['url']
            image['size'] = image_item['size']
            image['width'] = image_item['width']
            image['height'] = image_item['height']
            image['order'] = image_item['order']
            images_data.append(image)

df = pd.DataFrame(images_data)
df