# 3Logic

In [None]:
import numpy as np
import pandas as pd
import json
import lxml.html
import datetime

## Категории

In [None]:
file = open('/home/abezpalov/data/3logic/product-categories.json', 'r')
response = json.load(file)

In [None]:
df = pd.DataFrame(response)

In [None]:
file = open('/home/abezpalov/data/3logic/price-categories.json', 'r')
response = json.load(file)

In [None]:
def get_category_key(x):
    if x[-1] == '/':
        x = x[:-1]
    return x.split('/')[-1]

def get_parent_key(x):
    if x[-1] == '/':
        x = x[:-1]
    x = x.split('/')
    return x[-2] if len(x) > 1 else None

def get_category_metadata(row):
    metadata = dict(row[['price_category_id', 'path', 'is_parent']])
    return json.dumps(metadata)

categories_df = pd.DataFrame(response)
categories_df['key'] = categories_df['path'].apply(get_category_key)
categories_df['parent_key'] = categories_df['path'].apply(get_parent_key)
categories_df['metadata'] = categories_df.apply(get_category_metadata, axis=1)
categories_df = categories_df.drop(['price_category_id', 'path', 'level', 'is_parent'], axis=1)

## Производители

In [None]:
file = open('/home/abezpalov/data/3logic/vendors.json', 'r')
response = json.load(file)

In [None]:
vendor_df = pd.DataFrame(response)
vendor_df = vendor_df.rename(columns={'brand_id': 'key'})

## Продукты

In [None]:
file = open('/home/abezpalov/data/3logic/products_ext_124.json', 'r')
data_list = json.load(file)

In [None]:
file = open('/home/abezpalov/data/3logic/categories_keys.json', 'r')
categories_keys = json.load(file)

In [None]:
# products_df
# barcodes_df

products_df = pd.DataFrame(data_list)

def get_category_key(x):
    try:
        return categories_keys[str(x)]
    except KeyError:
        return None

def get_length(row):
    if row['product_length'] and row['product_width']:
        return max(row['product_length'], row['product_width'])
    return None

def get_width(row):
    if row['product_length'] and row['product_width']:
        return min(row['product_length'], row['product_width'])
    return None

def get_positive_float(x):
    x = float(x)
    return x if x else None

def get_min_of_quantity(x):
    return float(x) if x else 1

def get_quantity(x):
    try:
        return x['quantity']
    except TypeError:
        return None

def get_expected_date(x):
    try:
        return x['arrival_date']
    except TypeError:
        return None

products_df = products_df.rename(columns={'product_id': 'key',
                                          'partnumber': 'part_number',
                                          'product_name': 'name',
                                          'product_height': 'height',
                                          'brand_id': 'vendor_key',
                                          'package_volume': 'volume',
                                          'package_weight': 'weight'})

products_df['category_key'] = products_df['price_category_id'].apply(get_category_key)
products_df['length'] = products_df.apply(get_length, axis=1)
products_df['width'] = products_df.apply(get_width, axis=1)
products_df['height'] = products_df['height'].apply(get_positive_float)
products_df['weight'] = products_df['weight'].apply(get_positive_float)
products_df['volume'] = products_df['volume'].apply(get_positive_float)
products_df['min_of_quantity'] = products_df['package_quantity'].apply(get_min_of_quantity)
products_df['step_of_quantity'] = products_df['min_of_quantity']
products_df = products_df[products_df['product_quality'] == 'good']

barcodes_df = products_df[['key', 'barcode']].copy()
barcodes_df = barcodes_df.rename(columns={'key': 'product_key',
                                          'barcode': 'value'})
barcodes_df = barcodes_df[barcodes_df['value'] != '']

parties_df = products_df[['key', 'price', 'currency_iso_code', 'remain', 'remain_addinf', 'transit']].copy()
parties_df = parties_df.assign(unconditional=False)
parties_df = parties_df.rename(columns={'key': 'product_key',
                                        'price': 'price_in',
                                        'currency_iso_code': 'currency_key_in'})
parties_df = parties_df.assign(unit_key='шт')

stock_df = parties_df[parties_df['remain'] > 0].copy()
stock_df = stock_df.rename(columns={'remain': 'quantity'})
stock_df = stock_df.assign(location_key="Склад")
stock_df = stock_df.assign(expected_date=None)
stock_df = stock_df.drop(['remain_addinf', 'transit'], axis=1)

transit_df = parties_df.copy()
transit_df['quantity'] = transit_df['transit'].apply(get_quantity)
transit_df['expected_date'] = transit_df['transit'].apply(get_expected_date)
transit_df = transit_df.assign(location_key="Транзит")
transit_df = transit_df.drop(['remain', 'remain_addinf', 'transit'], axis=1)
transit_df = transit_df[transit_df['quantity'] > 0]

parties_df = pd.concat([stock_df, transit_df])

products_df = products_df.drop(['model', 'brand_name', 'photos', 'attributes', 'product_length', 'product_width',
                                'package_quantity', 'barcode', 'price', 'currency_iso_code', 'remain', 'remain_addinf',
                                'transit', 'product_category', 'product_category_id', 'price_category_id', 'price_category'], axis=1)

In [None]:
products_df

## Характеристики и продукты

In [None]:
file = open('/home/abezpalov/data/3logic/content_124.json', 'r')
data_list = json.load(file)

In [None]:
data_list

In [None]:
def get_value(x):
    return ', '.join(x) if list == type(x) else x

images_data = list()
values_data = list()

for product_item in data_list:
    if product_item['photos']:
        for item in product_item['photos']:
            row_ = dict()
            row_['product_key'] = product_item['product_id']
            for key in item:
                row_[key] = item[key]
            images_data.append(row_)

    if product_item['attributes']:
        for item in product_item['attributes']:
            row_ = dict()
            row_['product_key'] = product_item['product_id']
            for key in item:
                row_[key] = item[key]
            values_data.append(row_)

images_df = pd.DataFrame(images_data)
values_df = pd.DataFrame(values_data)

images_df = images_df.rename(columns={'large_image_url': 'source_url'})
images_df = images_df.drop(['photo_id', 'small_image_url'], axis=1)

groups_df = values_df[['group_name']].copy().drop_duplicates()
groups_df = groups_df.rename(columns={'group_name': 'name'})
groups_df['key'] = groups_df['name']

parameters_df = values_df[['group_name', 'attribute_name']]
parameters_df = parameters_df.rename(columns={'group_name': 'group_key',
                                              'attribute_name': 'name'})
parameters_df['key'] = parameters_df['name']
parameters_df = parameters_df.drop_duplicates()

values_df['value'] = values_df['value'].apply(get_value)
values_df = values_df.rename(columns={'group_name': 'group_key',
                                      'unit': 'unit_key',
                                      'attribute_name': 'parameter_key'})
values_df = values_df.drop(['attribute_id', 'type', 'is_multivalue'], axis=1)

In [None]:
values_df

In [None]:
parameters_df

In [None]:
file = open('/home/abezpalov/data/3logic/products_25.json', 'r')
data_list = json.load(file)
df = pd.DataFrame(data_list)

df