# GMC

In [None]:
import numpy as np
import pandas as pd
import json

import requests
import lxml.html
import lxml.etree

In [None]:
response = requests.get('https://bs-opt.ru/bitrix/catalog_export/export_yJ0.xml')
tree = lxml.etree.fromstring(response.content)

tree

In [None]:
categories_data_list = list()
for category_data in tree.xpath('./shop/categories/category'):
    
    category = dict()

    try:
        category['key'] = category_data.xpath('@id')[0]
    except IndexError:
        category['key'] = None

    try:
        category['parent_key'] = category_data.xpath('@parentId')[0]
    except IndexError:
        category['parent_key'] = None

    try:
        category['name'] = category_data.xpath('./text()')[0]
    except IndexError:
        category['name'] = None


    categories_data_list.append(category)

categories_df = pd.DataFrame(categories_data_list)
categories_df

In [None]:
products_data_list = list()
for product_item in tree.xpath('./shop/offers/offer'):

    product = dict()

    try:
        product['key'] = product_item.xpath('@id')[0]
    except IndexError:
        product['key'] = None

    try:
        product['type'] = product_item.xpath('@type')[0]
    except IndexError:
        product['type'] = None

    try:
        product['available'] = product_item.xpath('@available')[0]
    except IndexError:
        product['available'] = None

    try:
        product['source_url'] = product_item.xpath('./url/text()')[0]
    except IndexError:
        product['source_url'] = None

    try:
        product['price_in'] = product_item.xpath('./price/text()')[0]
    except IndexError:
        product['price_in'] = None

    try:
        product['vat'] = product_item.xpath('./vat/text()')[0]
    except IndexError:
        product['vat'] = None

    try:
        product['currency_key_in'] = product_item.xpath('./currencyId/text()')[0]
    except IndexError:
        product['currency_key_in'] = None

    try:
        product['category_key'] = product_item.xpath('./categoryId/text()')[0]
    except IndexError:
        product['category_key'] = None

    try:
        product['picture'] = product_item.xpath('./picture/text()')[0]
    except IndexError:
        product['picture'] = None

    try:
        product['vendor_key'] = product_item.xpath('./vendor/text()')[0]
    except IndexError:
        product['vendor_key'] = None

    try:
        product['name'] = product_item.xpath('./model/text()')[0]
    except IndexError:
        product['name'] = None

    try:
        product['part_number'] = product_item.xpath('./vendorCode/text()')[0]
    except IndexError:
        product['part_number'] = None

    try:
        product['typePrefix'] = product_item.xpath('./typePrefix/text()')[0]
    except IndexError:
        product['typePrefix'] = None

    try:
        product['sales_notes'] = product_item.xpath('./sales_notes/text()')[0]
    except IndexError:
        product['sales_notes'] = None

    try:
        product['barcode'] = product_item.xpath('./barcode/text()')[0]
    except IndexError:
        product['barcode'] = None

    try:
        product['quantity'] = product_item.xpath('./count/text()')[0]
    except IndexError:
        product['quantity'] = None



    products_data_list.append(product)

df = pd.DataFrame(products_data_list)
df

### Смотрим данные

In [None]:
list(df)

In [None]:
display(df['key'].describe())
display(df['key'].unique())

In [None]:
display(df['type'].describe())
display(df['type'].unique())

In [None]:
display(df['available'].describe())
display(df['available'].unique())

In [None]:
display(df['url'].describe())
display(df['url'].unique())

In [None]:
display(df['price_in'].describe())
display(df['price_in'].unique())

In [None]:
display(df['vat'].describe())
display(df['vat'].unique())

In [None]:
display(df['currencyId'].describe())
display(df['currencyId'].unique())

In [None]:
display(df['categoryId'].describe())
display(df['categoryId'].unique())

In [None]:
display(df['picture'].describe())
display(df['picture'].unique())

In [None]:
display(df['vendor'].describe())
display(df['vendor'].unique())

In [None]:
display(df['model'].describe())
display(df['model'].unique())

In [None]:
display(df['vendorCode'].describe())
display(df['vendorCode'].unique())

In [None]:
display(df['typePrefix'].describe())
display(df['typePrefix'].unique())

In [None]:
display(df['sales_notes'].describe())
display(df['sales_notes'].unique())

In [None]:
display(df['barcode'].describe())
display(df['barcode'].unique())

In [None]:
display(df['count'].describe())
display(df['count'].unique())

### Чистим данные

In [None]:
list(df)

In [None]:
df['product_key'] = df['key']
df = df.assign(location_key='GMC')
df = df.assign(unit_key='шт')

In [None]:
def get_nds(x):
    return None if x is None else float(x.split('_')[1])*0.01
    
df['nds'] = df['vat'].apply(get_nds)
df['nds']

In [None]:
def fix_currency_key(x):
    return 'RUB' if x == 'RUR' else x

df['currency_key_in'] = df['currency_key_in'].apply(fix_currency_key)
df['currency_key_in']

In [None]:
product_df = df[['key', 'category_key', 'vendor_key', 'part_number', 'name', 'source_url']]
product_df

In [None]:
parties_df = df[['product_key', 'quantity', 'price_in', 'currency_key_in', 'nds', 'location_key', 'unit_key']]
parties_df

In [None]:
images_df = df[['product_key', 'picture']]
images_df = images_df.rename(columns = {'picture': 'source_url'})

images_df

## Контент

In [None]:
response = requests.get('https://bs-opt.ru/servernye-komplektuyushchie/operativnaya-pamyat/operativnaya-pamyat-ibm-8gb-1-35v-pc3l-10600-cl9-ecc-ddr3-1333mhz-lp-49y1415/')
tree = lxml.html.fromstring(response.text)

tree

In [None]:
parameters_list = list()       

for item in tree.xpath('.//ul[contains(@class, "product-info")]/li'):
    print(item)

    parameter = dict()

    parameter['parameter_key'] = item.xpath('./div[contains(@class, "product-info-title")]/text()')[0]
    parameter['key'] = item.xpath('./div[contains(@class, "product-info-title")]/text()')[0]
    parameter['name'] = item.xpath('./div[contains(@class, "product-info-title")]/text()')[0]
    parameter['value'] = item.xpath('./a[contains(@class, "product-info-data")]/text()')[0]

    parameters_list.append(parameter)

for item in tree.xpath('.//table[contains(@class, "product-specifications-table")]//tr'):
    print(item)

    parameter = dict()

    parameter['parameter_key'] = item.xpath('.//td/text()')[0]
    parameter['key'] = item.xpath('.//td/text()')[0]
    parameter['name'] = item.xpath('.//td/text()')[0]
    parameter['value'] = item.xpath('.//td/text()')[1]

    parameters_list.append(parameter)

if parameters_list:
    df = pd.DataFrame(parameters_list)
    df = df.assign(group_key=None)
    parameters_df = df[['group_key', 'key', 'name']]
    values_df = df[['group_key', 'parameter_key', 'value']]
else:
    parameters_df, values_df = None, None

df

In [None]:
parameters_df

In [None]:
values_df