In [35]:
import os
from clickhouse_driver import Client
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Параметры подключения
clickhouse_host = os.getenv('CLICKHOUSE_HOST', '10.95.19.132')
clickhouse_user = os.getenv('CLICKHOUSE_USER', 'default')
clickhouse_password = os.getenv('CLICKHOUSE_PASSWORD', 'quie1ahpoo5Su0wohpaedae8keeph6bi')
database_name = os.getenv('CLICKHOUSE_DB', 'default')

# Установить количество дней для расчёта
number_of_days = 2  # Можно менять для тестирования

# Подключение к ClickHouse
client = Client(
    host=clickhouse_host,
    user=clickhouse_user,
    password=clickhouse_password,
    database=database_name
)

# Шаг 0: Создание структуры промежуточной таблицы
def initialize_intermediate_table():
    columns = [
        "serialno", "Dates", 
        # Поля target_date-1 (только чтение)
        "Status_prev", "Status_P_prev", "sne_prev", "ppr_prev", "repair_days_prev",
        # Поля target_date (для вычислений и записи)
        "Status", "Status_P", "sne", "ppr", "repair_days",
        "daily_flight_hours", "ll", "oh", "BR", "RepairTime", "ac_typ", 
        "mi8t_count", "mi17_count",
        "balance_total", "balance_mi8t", "balance_mi17", "balance_empty",
        "stock_mi8t", "stock_mi17", "stock_empty", "stock_total"
    ]
    return pd.DataFrame(columns=columns).set_index("serialno")

# Промежуточная таблица в памяти
intermediate_table = initialize_intermediate_table()

# Шаг 1: Определение первых дат
current_date_query = "SELECT MIN(Dates) FROM OlapCube_VNV"
target_date_prev = client.execute(current_date_query)[0][0]

for day in range(number_of_days):
    target_date = target_date_prev + timedelta(days=1)
    print(f"Обработка данных для дня {day + 1}, target_date-1: {target_date_prev}, target_date: {target_date}")

    # Шаг 2: Загрузка данных из ClickHouse
    query = f"""
        SELECT 
            serialno, Dates,
            Status AS Status_prev, 
            Status_P AS Status_P_prev, 
            sne AS sne_prev, 
            ppr AS ppr_prev, 
            repair_days AS repair_days_prev,
            NULL AS Status, 
            NULL AS Status_P, 
            NULL AS sne, 
            NULL AS ppr, 
            NULL AS repair_days,
            daily_flight_hours, ll, oh, BR, RepairTime, ac_typ, 
            mi8t_count, mi17_count,
            NULL AS balance_total, NULL AS balance_mi8t, NULL AS balance_mi17, NULL AS balance_empty,
            NULL AS stock_mi8t, NULL AS stock_mi17, NULL AS stock_empty, NULL AS stock_total
        FROM OlapCube_VNV
        WHERE Dates IN ('{target_date_prev}', '{target_date}')
    """
    data_raw = client.execute(query)
    columns = [
        "serialno", "Dates", "Status_prev", "Status_P_prev", "sne_prev", "ppr_prev", "repair_days_prev",
        "Status", "Status_P", "sne", "ppr", "repair_days",
        "daily_flight_hours", "ll", "oh", "BR", "RepairTime", "ac_typ", 
        "mi8t_count", "mi17_count",
        "balance_total", "balance_mi8t", "balance_mi17", "balance_empty",
        "stock_mi8t", "stock_mi17", "stock_empty", "stock_total"
    ]
    data = pd.DataFrame(data_raw, columns=columns).set_index("serialno")
    intermediate_table = data

    # Шаг 3: Проведение расчетов

    ## Шаг I: Определение Status_P
    intermediate_table['Status_P'] = None
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Неактивно', 'Status_P'] = 'Неактивно'
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Хранение', 'Status_P'] = 'Хранение'
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Исправен', 'Status_P'] = 'Исправен'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Ремонт') & 
        (intermediate_table['repair_days_prev'] < intermediate_table['RepairTime']),
        'Status_P'
    ] = 'Ремонт'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Ремонт') & 
        (intermediate_table['repair_days_prev'] >= intermediate_table['RepairTime']),
        'Status_P'
    ] = 'Исправен'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['sne_prev'] < (intermediate_table['ll'] - intermediate_table['daily_flight_hours'])) &
        (intermediate_table['ppr_prev'] < (intermediate_table['oh'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = 'Эксплуатация'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['sne_prev'] >= (intermediate_table['ll'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = 'Хранение'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['ppr_prev'] >= (intermediate_table['oh'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = np.where(
        intermediate_table['sne_prev'] < intermediate_table['BR'],
        'Ремонт',
        'Хранение'
    )

    ## Шаг II: Расчёт балансов и запасов
    intermediate_table['balance_mi8t'] = (
        intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Эксплуатация') & 
            (intermediate_table['ac_typ'] == 'Ми-8Т')
        ].groupby('Dates').transform('count')['Status_P']
        - intermediate_table['mi8t_count']
    )
    intermediate_table['balance_mi17'] = (
        intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Эксплуатация') & 
            (intermediate_table['ac_typ'] == 'Ми-17')
        ].groupby('Dates').transform('count')['Status_P']
        - intermediate_table['mi17_count']
    )
    intermediate_table['balance_empty'] = (
        intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Эксплуатация') & 
            (intermediate_table['ac_typ'].isnull())
        ].groupby('Dates').transform('count')['Status_P']
    )
    intermediate_table['balance_total'] = (
        intermediate_table['balance_mi8t'] + 
        intermediate_table['balance_mi17'] + 
        intermediate_table['balance_empty']
    )

    ## Шаг III: Балансировка статусов
    if intermediate_table['balance_total'].iloc[0] > 0:
        balance_to_fix = intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Эксплуатация')
        ].head(intermediate_table['balance_total'].iloc[0]).index
        intermediate_table.loc[balance_to_fix, 'Status_P'] = 'Исправен'
    elif intermediate_table['balance_total'].iloc[0] < 0:
        balance_to_operate = intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Исправен')
        ].head(abs(intermediate_table['balance_total'].iloc[0])).index
        intermediate_table.loc[balance_to_operate, 'Status_P'] = 'Эксплуатация'

    ## Шаг IV: Обновление счётчиков sne, ppr и repair_days
    intermediate_table.loc[
        intermediate_table['Status_P'] == 'Эксплуатация',
        ['sne', 'ppr']
    ] = intermediate_table.loc[
        intermediate_table['Status_P'] == 'Эксплуатация',
        ['sne_prev', 'ppr_prev']
    ] + intermediate_table['daily_flight_hours']

    intermediate_table.loc[
        intermediate_table['Status_P'] == 'Исправен',
        'sne'
    ] = intermediate_table.loc[
        intermediate_table['Status_P'] == 'Исправен',
        'sne_prev'
    ]
    intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] == 'Ремонт'),
        'ppr'
    ] = 0
    intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] != 'Ремонт'),
        'ppr'
    ] = intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] != 'Ремонт'),
        'ppr_prev'
    ]

    # Шаг 4: Выгрузка данных в ClickHouse
    delete_query = f"ALTER TABLE OlapCube_VNV DELETE WHERE Dates = '{target_date}'"
    client.execute(delete_query)

    insert_query = """
        INSERT INTO OlapCube_VNV (serialno, Dates, Status, Status_P, sne, ppr, repair_days)
        VALUES
    """
    values = ", ".join([
        f"('{serial}', '{target_date}', '{row['Status']}', '{row['Status_P']}', {row['sne']}, {row['ppr']}, {row['repair_days']})"
        for serial, row in intermediate_table.iterrows()
    ])
    client.execute(insert_query + values)

    target_date_prev = target_date

print("Обработка завершена.")


Обработка данных для дня 1, target_date-1: 2024-11-25, target_date: 2024-11-26


ValueError: cannot reindex on an axis with duplicate labels

In [36]:
import os
from clickhouse_driver import Client
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Параметры подключения
clickhouse_host = os.getenv('CLICKHOUSE_HOST', '10.95.19.132')
clickhouse_user = os.getenv('CLICKHOUSE_USER', 'default')
clickhouse_password = os.getenv('CLICKHOUSE_PASSWORD', 'quie1ahpoo5Su0wohpaedae8keeph6bi')
database_name = os.getenv('CLICKHOUSE_DB', 'default')

# Установить количество дней для расчёта
number_of_days = 2  # Можно менять для тестирования

# Подключение к ClickHouse
client = Client(
    host=clickhouse_host,
    user=clickhouse_user,
    password=clickhouse_password,
    database=database_name
)

# Шаг 0: Создание структуры промежуточной таблицы
def initialize_intermediate_table():
    columns = [
        "serialno", 
        # Поля target_date-1 (только чтение)
        "Status_prev", "Status_P_prev", "sne_prev", "ppr_prev", "repair_days_prev",
        # Поля target_date (для вычислений и записи)
        "Status", "Status_P", "sne", "ppr", "repair_days",
        "daily_flight_hours", "ll", "oh", "BR", "RepairTime", "ac_typ", 
        "mi8t_count", "mi17_count",
        "balance_total", "balance_mi8t", "balance_mi17", "balance_empty",
        "stock_mi8t", "stock_mi17", "stock_empty", "stock_total"
    ]
    return pd.DataFrame(columns=columns).set_index("serialno")

# Промежуточная таблица в памяти
intermediate_table = initialize_intermediate_table()

# Шаг 1: Определение первых дат
current_date_query = "SELECT MIN(Dates) FROM OlapCube_VNV"
target_date_prev = client.execute(current_date_query)[0][0]

for day in range(number_of_days):
    target_date = target_date_prev + timedelta(days=1)
    print(f"Обработка данных для дня {day + 1}, target_date-1: {target_date_prev}, target_date: {target_date}")

    # Шаг 2: Загрузка данных из ClickHouse
    query = f"""
        SELECT 
            serialno,
            Status AS Status_prev, 
            Status_P AS Status_P_prev, 
            sne AS sne_prev, 
            ppr AS ppr_prev, 
            repair_days AS repair_days_prev,
            NULL AS Status, 
            NULL AS Status_P, 
            NULL AS sne, 
            NULL AS ppr, 
            NULL AS repair_days,
            daily_flight_hours, ll, oh, BR, RepairTime, ac_typ, 
            mi8t_count, mi17_count,
            NULL AS balance_total, NULL AS balance_mi8t, NULL AS balance_mi17, NULL AS balance_empty,
            NULL AS stock_mi8t, NULL AS stock_mi17, NULL AS stock_empty, NULL AS stock_total
        FROM OlapCube_VNV
        WHERE Dates IN ('{target_date_prev}', '{target_date}')
    """
    data_raw = client.execute(query)
    columns = [
        "serialno", "Status_prev", "Status_P_prev", "sne_prev", "ppr_prev", "repair_days_prev",
        "Status", "Status_P", "sne", "ppr", "repair_days",
        "daily_flight_hours", "ll", "oh", "BR", "RepairTime", "ac_typ", 
        "mi8t_count", "mi17_count",
        "balance_total", "balance_mi8t", "balance_mi17", "balance_empty",
        "stock_mi8t", "stock_mi17", "stock_empty", "stock_total"
    ]
    data = pd.DataFrame(data_raw, columns=columns).set_index("serialno")
    intermediate_table = data

    # Шаг 3: Проведение расчетов

    ## Шаг I: Определение Status_P
    intermediate_table['Status_P'] = None
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Неактивно', 'Status_P'] = 'Неактивно'
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Хранение', 'Status_P'] = 'Хранение'
    intermediate_table.loc[intermediate_table['Status_prev'] == 'Исправен', 'Status_P'] = 'Исправен'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Ремонт') & 
        (intermediate_table['repair_days_prev'] < intermediate_table['RepairTime']),
        'Status_P'
    ] = 'Ремонт'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Ремонт') & 
        (intermediate_table['repair_days_prev'] >= intermediate_table['RepairTime']),
        'Status_P'
    ] = 'Исправен'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['sne_prev'] < (intermediate_table['ll'] - intermediate_table['daily_flight_hours'])) &
        (intermediate_table['ppr_prev'] < (intermediate_table['oh'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = 'Эксплуатация'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['sne_prev'] >= (intermediate_table['ll'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = 'Хранение'

    intermediate_table.loc[
        (intermediate_table['Status_prev'] == 'Эксплуатация') &
        (intermediate_table['ppr_prev'] >= (intermediate_table['oh'] - intermediate_table['daily_flight_hours'])),
        'Status_P'
    ] = np.where(
        intermediate_table['sne_prev'] < intermediate_table['BR'],
        'Ремонт',
        'Хранение'
    )

    ## Шаг II: Расчёт балансов и запасов
    intermediate_table['balance_mi8t'] = (
        intermediate_table[(intermediate_table['Status_P'] == 'Эксплуатация') & 
                           (intermediate_table['ac_typ'] == 'Ми-8Т')].shape[0]
        - intermediate_table['mi8t_count']
    )
    intermediate_table['balance_mi17'] = (
        intermediate_table[(intermediate_table['Status_P'] == 'Эксплуатация') & 
                           (intermediate_table['ac_typ'] == 'Ми-17')].shape[0]
        - intermediate_table['mi17_count']
    )
    intermediate_table['balance_empty'] = (
        intermediate_table[(intermediate_table['Status_P'] == 'Эксплуатация') & 
                           (intermediate_table['ac_typ'].isnull())].shape[0]
    )
    intermediate_table['balance_total'] = (
        intermediate_table['balance_mi8t'] + 
        intermediate_table['balance_mi17'] + 
        intermediate_table['balance_empty']
    )

    ## Шаг III: Балансировка статусов
    if intermediate_table['balance_total'].iloc[0] > 0:
        balance_to_fix = intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Эксплуатация')
        ].head(intermediate_table['balance_total'].iloc[0]).index
        intermediate_table.loc[balance_to_fix, 'Status_P'] = 'Исправен'
    elif intermediate_table['balance_total'].iloc[0] < 0:
        balance_to_operate = intermediate_table.loc[
            (intermediate_table['Status_P'] == 'Исправен')
        ].head(abs(intermediate_table['balance_total'].iloc[0])).index
        intermediate_table.loc[balance_to_operate, 'Status_P'] = 'Эксплуатация'

    ## Шаг IV: Обновление счётчиков sne, ppr и repair_days
    intermediate_table.loc[
        intermediate_table['Status_P'] == 'Эксплуатация',
        ['sne', 'ppr']
    ] = intermediate_table.loc[
        intermediate_table['Status_P'] == 'Эксплуатация',
        ['sne_prev', 'ppr_prev']
    ] + intermediate_table['daily_flight_hours']

    intermediate_table.loc[
        intermediate_table['Status_P'] == 'Исправен',
        'sne'
    ] = intermediate_table.loc[
        intermediate_table['Status_P'] == 'Исправен',
        'sne_prev'
    ]
    intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] == 'Ремонт'),
        'ppr'
    ] = 0
    intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] != 'Ремонт'),
        'ppr'
    ] = intermediate_table.loc[
        (intermediate_table['Status_P'] == 'Исправен') & 
        (intermediate_table['Status_P_prev'] != 'Ремонт'),
        'ppr_prev'
    ]

    # Шаг 4: Выгрузка данных в ClickHouse
    delete_query = f"ALTER TABLE OlapCube_VNV DELETE WHERE Dates = '{target_date}'"
    client.execute(delete_query)

    insert_query = """
        INSERT INTO OlapCube_VNV (serialno, Dates, Status, Status_P, sne, ppr, repair_days)
        VALUES
    """
    values = ", ".join([
        f"('{serial}', '{target_date}', '{row['Status']}', '{row['Status_P']}', {row['sne']}, {row['ppr']}, {row['repair_days']})"
        for serial, row in intermediate_table.iterrows()
    ])
    client.execute(insert_query + values)

    target_date_prev = target_date

print("Обработка завершена.")


Обработка данных для дня 1, target_date-1: 2024-11-25, target_date: 2024-11-26


TypeError: cannot do positional indexing on Index with these indexers [156.0] of type float64