In [1]:
import requests
from dotenv import load_dotenv
import os
from datetime import datetime, timedelta
import pandas as pd
from clickhouse_connect import get_client
from clickhouse_driver import Client

load_dotenv()

True

In [22]:
KeyGuten = os.getenv('KeyGuten')
KeyGiper = os.getenv('KeyGiper')
KeyKitchen = os.getenv('KeyKitchen')
KeySmart = os.getenv("KeySmart")

In [35]:
# Constants
API_URL = "https://statistics-api.wildberries.ru/api/v1/supplier/orders"
headers = {
    "Authorization": KeyGuten,
    "Content-Type": "application/json"
}

# Function to fetch orders from the API
def fetch_orders(date_from, flag=0):
    params = {
        "dateFrom": date_from,
        "flag": flag
    }
    response = requests.get(API_URL, headers=headers, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Main function
if __name__ == "__main__":
    # Define the start date (beginning of the year)
    start_date = datetime(datetime.now().year, 1, 1).strftime("%Y-%m-%dT%H:%M:%S")
    
    # Initialize variables
    all_orders = []
    date_from = start_date
    
    # Fetch all orders in a loop
    while True:
        print(f"Fetching orders from {date_from}...")
        orders = fetch_orders(date_from, flag=0)
        
        if not orders:
            print("No more orders found.")
            break
        
        # Append the fetched orders to the list
        all_orders.extend(orders)
        
        # Update date_from to the lastChangeDate of the last order
        last_change_date = orders[-1]["lastChangeDate"]
        date_from = last_change_date
        
        # Break if no more orders are returned
        if not orders:
            break
    
    # Convert the data into a pandas DataFrame
    df = pd.DataFrame(all_orders)
    
    # Save the DataFrame to a CSV file (optional)
    df.to_excel("wildberries_orders.xlsx", index=False)
    print("Data saved to 'wildberries_orders.xlsx'.")

Fetching orders from 2025-01-01T00:00:00...
Fetching orders from 2025-03-10T18:54:05...
No more orders found.
Data saved to 'wildberries_orders.xlsx'.


In [38]:
df['Marketplace'] = 'Wildberries'
df['Project'] = 'WB-GutenTech'

# Convert date strings to datetime, invalid dates become NaT
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['lastChangeDate'] = pd.to_datetime(df['lastChangeDate'], errors='coerce')
df['cancelDate'] = pd.to_datetime(df['cancelDate'], errors='coerce')

# Replace NaT (invalid dates) with '2100-01-01'
df['cancelDate'] = df['cancelDate'].fillna(pd.Timestamp('2100-01-01'))
df

Unnamed: 0,date,lastChangeDate,warehouseName,warehouseType,countryName,oblastOkrugName,regionName,supplierArticle,nmId,barcode,...,finishedPrice,priceWithDisc,isCancel,cancelDate,orderType,sticker,gNumber,srid,Marketplace,Project
0,2024-12-30 12:45:03,2025-01-01 00:05:46,Коледино,Склад WB,Россия,Центральный федеральный округ,Московская область,8700216171816,181893183,8700216171816,...,1365.00,1845.00,False,2100-01-01,Клиентский,27627873691,4874530169328483674,5014945668838056364.0.0,Wildberries,WB-GutenTech
1,2024-12-26 22:57:27,2025-01-01 00:05:47,Коледино,Склад WB,Россия,Северо-Кавказский федеральный округ,Республика Дагестан,I02071,159121851,4981046050494,...,6913.00,8230.00,False,2100-01-01,Клиентский,24359722360,5708125670413416526,55645719602599123.0.0,Wildberries,WB-GutenTech
2,2024-12-27 11:04:31,2025-01-01 00:05:50,Краснодар,Склад WB,Россия,Южный федеральный округ,Севастополь,I01475,159488075,4580198173015,...,3540.00,4215.00,False,2100-01-01,Клиентский,27568829189,6537882526621632455,7845b788e3114bde91f02e4ba03d7686,Wildberries,WB-GutenTech
3,2024-12-31 17:54:40,2025-01-01 00:28:54,Коледино,Склад WB,Беларусь,,Минск,4084500844483,200608022,4084500844483,...,577.16,779.92,False,2100-01-01,Клиентский,27627050651,97199218436949014222,dc.ia5e015b57d49f5729e45bc8c42f095f1.0.0,Wildberries,WB-GutenTech
4,2024-12-31 17:54:40,2025-01-01 00:28:54,Коледино,Склад WB,Беларусь,,Минск,4084500844483,200608022,4084500844483,...,577.16,779.92,False,2100-01-01,Клиентский,27627050655,97199218436949014222,dc.ia5e015b57d49f5729e45bc8c42f095f1.0.1,Wildberries,WB-GutenTech
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28249,2025-03-10 15:30:19,2025-03-10 18:53:26,Тула,Склад WB,Россия,Южный федеральный округ,Ростовская область,I01475,159488075,4580198173015,...,3337.00,3880.00,False,2100-01-01,Клиентский,28876594219,98968440942939143648,7241689317998485021.0.0,Wildberries,WB-GutenTech
28250,2025-03-10 17:49:57,2025-03-10 18:53:33,Тула,Склад WB,Россия,Центральный федеральный округ,Москва,7702018574315,242960840,7702018574315,...,1796.00,2495.00,False,2100-01-01,Клиентский,,9028251944799643837,8836616262822419347.0.0,Wildberries,WB-GutenTech
28251,2025-03-10 16:08:50,2025-03-10 18:53:35,Коледино,Склад WB,Россия,Сибирский федеральный округ,Омская область,7702018574223,242981208,7702018574223,...,1306.00,1815.00,False,2100-01-01,Клиентский,25170063624,95380110571690639517,6522147377715879649.0.0,Wildberries,WB-GutenTech
28252,2025-03-10 17:52:41,2025-03-10 18:53:57,Тула,Склад WB,Россия,Сибирский федеральный округ,Иркутская область,Б0052971,51592808,4210201347163,...,1000.00,1267.00,False,2100-01-01,Клиентский,,96463104285346693657,6540932183984485273.0.0,Wildberries,WB-GutenTech


In [39]:
password = os.getenv('ClickHouse')

In [None]:
# Define connection parameters
client = get_client(
    host='rc1a-j5ou9lq30ldal602.mdb.yandexcloud.net',  # Your Yandex Cloud ClickHouse host
    port=8443,                                          # Yandex Cloud uses port 8443 for HTTPS
    username='user1',                                      # Your ClickHouse username
    password= password,                           # Your ClickHouse password
    database='user1',                            # Your database name
    secure=True,                                        # Use HTTPS
    verify=False                                        # Disable SSL certificate verification 
    # Define the data to insert
)

In [41]:
# Define the table name
table_name = 'orders_wb'

# Convert DataFrame to a list of tuples for bulk insertion
data_tuples = [tuple(row) for row in df.to_numpy()]

# Define the column names
column_names = [
    'date', 'lastChangeDate', 'warehouseName', 'warehouseType', 'countryName',
    'oblastOkrugName', 'regionName', 'supplierArticle', 'nmId', 'barcode',
    'category', 'subject', 'brand', 'techSize', 'incomeID', 'isSupply',
    'isRealization', 'totalPrice', 'discountPercent', 'spp', 'finishedPrice',
    'priceWithDisc', 'isCancel', 'cancelDate', 'orderType', 'sticker',
    'gNumber', 'srid','Marketplace','Project'
]

# Use the insert method for bulk insertion
client.insert(table_name, data_tuples, column_names=column_names)

print("Data inserted successfully!")

Data inserted successfully!
