# RRC

In [None]:
import numpy as np
import pandas as pd
import json

import requests
import lxml.html
import lxml.etree

## Каталог XML

In [None]:
requests.get('https://thinklink.ru/personal/xml',
             params={'code':'__',
                     'id':'__',
                     'login':'__',
                     'pass':'__'},)

In [None]:
response = requests.get('https://thinklink.ru/personal/xml/__.xml')

In [None]:
tree = lxml.etree.fromstring(response.content)
tree

### categories_df

In [None]:
categories_data_list = list()
for category_data in tree.xpath('./section_catalog/section'):
    
    category = dict()

    try:
        category['key'] = category_data.xpath('./id/text()')[0]
    except IndexError:
        category['key'] = None
    
    try:
        category['code'] = category_data.xpath('./code/text()')[0]
    except IndexError:
        category['code'] = None

    try:
        category['name'] = category_data.xpath('./name/text()')[0]
    except IndexError:
        category['name'] = None
    
    try:
        category['parent_key'] = category_data.xpath('./section_id/text()')[0]
    except IndexError:
        category['parent_key'] = None
    
    try:
        category['section_code'] = category_data.xpath('./section_code/text()')[0]
    except IndexError:
        category['section_code'] = None
    
    try:
        category['depth_level'] = category_data.xpath('./depth_level/text()')[0]
    except IndexError:
        category['depth_level'] = None
    
    categories_data_list.append(category)

df = pd.DataFrame(categories_data_list)

df

#### Смотрим данные

In [None]:
list(df)

In [None]:
display(df['key'].describe())
display(df['key'].unique())

In [None]:
display(df['code'].describe())
display(df['code'].unique())

In [None]:
display(df['name'].describe())
display(df['name'].unique())

In [None]:
display(df['section_id'].describe())
display(df['section_id'].unique())

In [None]:
display(df['section_code'].describe())
display(df['section_code'].unique())

In [None]:
display(df['depth_level'].describe())
display(df['depth_level'].unique())

In [None]:
df.rename(columns={'section_id': 'parent_key'}, inplace=True)

df

#### Чистим данные

In [None]:
categories_df = df[['key', 'parent_key', 'name']]
categories_df

In [None]:
display(df['parent_key'].describe())
display(df['parent_key'].unique())

### vendors_df, products_df, parties_df

In [None]:
products_data_list = list()
for products_item in tree.xpath('./catalog_item/item'):
    
    product = dict()

    try:
        product['key'] = products_item.xpath('./id/text()')[0]
    except IndexError:
        product['key'] = None

    try:
        product['name'] = products_item.xpath('./name/text()')[0]
    except IndexError:
        product['name'] = None

    try:
        product['code'] = products_item.xpath('./code/text()')[0]
    except IndexError:
        product['code'] = None

    try:
        product['category_key'] = products_item.xpath('./section_id/text()')[0]
    except IndexError:
        product['category_key'] = None

    try:
        product['section_code'] = products_item.xpath('./section_code/text()')[0]
    except IndexError:
        product['section_code'] = None

    try:
        product['vendor_pn'] = products_item.xpath('./vendor_pn/text()')[0]
    except IndexError:
        product['vendor_pn'] = None

    try:
        product['partnumber'] = products_item.xpath('./partnumber/text()')[0]
    except IndexError:
        product['partnumber'] = None

    try:
        product['vendor_key'] = products_item.xpath('./vendor/text()')[0]
    except IndexError:
        product['vendor_key'] = None

    try:
        product['quantity'] = products_item.xpath('./quantity/text()')[0]
    except IndexError:
        product['quantity'] = None

    try:
        product['price_rub'] = products_item.xpath('./price_rub/text()')[0]
    except IndexError:
        product['price_rub'] = None

    try:
        product['price_usd'] = products_item.xpath('./price_usd/text()')[0]
    except IndexError:
        product['price_usd'] = None

    try:
        product['price_eur'] = products_item.xpath('./price_eur/text()')[0]
    except IndexError:
        product['price_eur'] = None

    try:
        product['MSRP_RUR'] = products_item.xpath('./MSRP_RUR/text()')[0]
    except IndexError:
        product['MSRP_RUR'] = None

    try:
        product['WithoutVAT'] = products_item.xpath('./WithoutVAT/text()')[0]
    except IndexError:
        product['WithoutVAT'] = None

    
    products_data_list.append(product)

df = pd.DataFrame(products_data_list)

df

#### Смотрим данные

In [None]:
list(df)

In [None]:
display(df['key'].describe())
display(df['key'].unique())

In [None]:
df[df['key'] == '78808']

In [None]:
display(df['name'].describe())
display(df['name'].unique())

In [None]:
display(df['code'].describe())
display(df['code'].unique())

In [None]:
display(df['category_key'].describe())
display(df['category_key'].unique())

In [None]:
display(df['section_code'].describe())
display(df['section_code'].unique())

In [None]:
display(df['vendor_pn'].describe())
display(df['vendor_pn'].unique())

In [None]:
display(df['partnumber'].describe())
display(df['partnumber'].unique())

In [None]:
df[df['vendor_pn'] != df['part_number']]

In [None]:
display(df['vendor_key'].describe())
display(df['vendor_key'].unique())

In [None]:
display(df['quantity'].describe())
display(df['quantity'].unique())

In [None]:
display(df['price_rub'].describe())
display(df['price_rub'].unique())

In [None]:
display(df['price_usd'].describe())
display(df['price_usd'].unique())

In [None]:
display(df['price_eur'].describe())
display(df['price_eur'].unique())

In [None]:
display(df['MSRP_RUR'].describe())
display(df['MSRP_RUR'].unique())

In [None]:
display(df['WithoutVAT'].describe())
display(df['WithoutVAT'].unique())

#### Чистим данные

In [None]:
def get_part_number(row):
    return row['vendor_pn'] if row['vendor_pn'] else row['partnumber']

df['part_number'] = df.apply(get_part_number, axis=1)
df['part_number']

In [None]:
df['quantity'] = df['quantity'].apply(int)
df['quantity']

In [None]:
def get_price_in(row):
    if row['price_usd'] is not None and float(row['price_usd']):
        return float(row['price_usd'])
    if row['price_rub'] is not None and float(row['price_rub']):
        return float(row['price_rub'])
    if row['price_eur'] is not None and float(row['price_eur']):
        return float(row['price_eur'])
    return None

df['price_in'] = df.apply(get_price_in, axis=1)
df['price_in']

In [None]:
def get_currency_in(row):
    if row['price_usd'] is not None and float(row['price_usd']):
        return 'USD'
    if row['price_rub'] is not None and float(row['price_rub']):
        return 'RUB'
    if row['price_eur'] is not None and float(row['price_eur']):
        return 'EUR'
    return None

df['currency_key_in'] = df.apply(get_currency_in, axis=1)
df['currency_key_in']

In [None]:
display(df['currency_key_in'].describe())
display(df['currency_key_in'].unique())

In [None]:
def get_price_out(x):
    return float(x) if x is not None else None

df['price_out'] = df['MSRP_RUR'].apply(get_price_out)
df['price_out']

In [None]:
def get_currency_key_out(x):
    return 'RUB' if x is not None else None

df['currency_key_out'] = df['MSRP_RUR'].apply(get_currency_key_out)
df['currency_key_out']

In [None]:
df['product_key'] = df['key']

In [None]:
df = df.assign(location_key='Склады RRC')

In [None]:
def get_product_metadata(row):
    metadata = dict(row[['code', 'vendor_pn', 'partnumber']])
    return json.dumps(metadata)

df['metadata'] = df.apply(get_product_metadata, axis=1)
df['metadata']  

In [None]:
list(df)

In [None]:
products_df = df[['key', 'name', 'category_key', 'vendor_key', 'metadata']]
products_df

In [None]:
patries_df = df[['product_key', 'quantity', 'price_in', 'currency_key_in', 'price_out', 'currency_key_out', 'location_key']]
patries_df = patries_df[patries_df['quantity'] > 0]
patries_df