In [9]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer

from catboost import CatBoostClassifier, Pool
from sklearn.metrics import classification_report, f1_score, roc_auc_score

from heapq import nlargest
import random

from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

In [3]:
def batch_load_sql(query: str) -> pd.DataFrame:

    ### Читаем записанный DataFrame из базы данных -->>

    # Функция для чтения признаков из базы данных батчами

    CHUNKSIZE = 200000
    engine = create_engine("postgresql://robot-startml-ro:pheiph0hahj1Vaif@postgres.lab.karpov.courses:6432/startml")
    conn = engine.connect().execution_options(stream_results=True)
    chunks = []
    for chunk_dataframe in pd.read_sql(query, conn, chunksize=CHUNKSIZE):
        chunks.append(chunk_dataframe)
    conn.close()
    return pd.concat(chunks, ignore_index=True)


def load_features(table_name) -> pd.DataFrame:

    ### Читаем DataFrame из базы данных -->>
    query = f"SELECT * FROM {table_name}"
    return batch_load_sql(query)


def load_to_sql(table_name, data):

    ### Записываем DataFrame в базу данных -->>

    engine = create_engine("postgresql://robot-startml-ro:pheiph0hahj1Vaif@postgres.lab.karpov.courses:6432/startml")
    data.to_sql(table_name, con=engine, if_exists='replace', index=False, chunksize=10000)
    

In [4]:
table_name = '''"darja_stiheeva_lms4973_features_lesson_22_pca_with_target"'''
features = load_features(table_name=table_name) 
display(features)

Unnamed: 0,user_id,post_id,gender,age,country,city,exp_group,topic,SumTfIdf,MaxTfIdf,...,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,weekday_cat,month_cat,timestamp,like_target
0,200,3434,1,34,Russia,Degtyarsk,3,covid,2.892646,0.510899,...,-0.104655,0.020903,0.014462,140,22,5,wend,11,2021-11-20 22:49:40,0
1,200,2416,1,34,Russia,Degtyarsk,3,covid,2.512959,0.469197,...,-0.169486,0.044218,0.036424,77,14,5,wend,10,2021-10-02 14:20:40,0
2,200,1592,1,34,Russia,Degtyarsk,3,sport,6.966328,0.540718,...,0.054070,-0.007312,-0.021639,1112,19,4,wday,10,2021-10-29 19:44:31,0
3,200,3151,1,34,Russia,Degtyarsk,3,covid,3.133805,0.583546,...,-0.095942,0.019393,0.013283,138,22,5,wend,11,2021-11-20 22:55:51,0
4,200,1125,1,34,Russia,Degtyarsk,3,politics,9.482513,0.422031,...,0.135959,0.052061,0.046768,2443,9,1,wday,11,2021-11-23 09:21:50,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1629367,168552,1915,1,16,Russia,Ivanteyevka,4,sport,7.159526,0.534125,...,0.046621,-0.045287,-0.072064,1130,18,1,wday,12,2021-12-07 18:33:29,1
1629368,168552,1810,1,16,Russia,Ivanteyevka,4,sport,7.573453,0.584978,...,0.033978,-0.043798,-0.053374,1282,13,2,wday,10,2021-10-20 13:47:41,1
1629369,168552,5487,1,16,Russia,Ivanteyevka,4,movie,5.709767,0.353557,...,-0.028779,-0.004209,-0.003524,674,9,1,wday,12,2021-12-21 09:32:54,1
1629370,168552,3628,1,16,Russia,Ivanteyevka,4,covid,3.117580,0.457843,...,-0.047158,-0.008549,-0.009046,140,14,1,wday,11,2021-11-23 14:44:41,1


In [36]:
features.columns

Index(['user_id', 'post_id', 'gender', 'age', 'country', 'city', 'exp_group',
       'topic', 'SumTfIdf', 'MaxTfIdf', 'MeanTfIdf', 'PCA_1_TfIdf',
       'PCA_2_TfIdf', 'PCA_3_TfIdf', 'PCA_4_TfIdf', 'len_text_num', 'hour_cat',
       'day_of_week_cat', 'weekday_cat', 'month_cat', 'timestamp',
       'like_target'],
      dtype='object')

In [5]:
def train_test_split_sorted(data, train_size=0.8):
    
    ### Отсортируем данные по дате
    display("Сортировка данных по дате: ")
    data["timestamp"] = pd.to_datetime(data["timestamp"])
    data = data.sort_values(by="timestamp")
    data.reset_index(drop=True, inplace=True)
    
    ### Делим выборку 80 на 20
    split_index = int(len(data) * train_size)
    train = data.iloc[:split_index].copy()
    test = data.iloc[split_index:].copy()
    display("Предварительная выборка на трейн: ")
    display(train)
    display("Предварительная выборка на тест: ")
    display(test)
    
    return train, test

In [7]:
train_data, test_data = train_test_split_sorted(features, train_size=0.8)

'Сортировка данных по дате: '

'Предварительная выборка на трейн: '

Unnamed: 0,user_id,post_id,gender,age,country,city,exp_group,topic,SumTfIdf,MaxTfIdf,...,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,weekday_cat,month_cat,timestamp,like_target
0,107767,685,0,31,Russia,Rostov,4,entertainment,13.597845,0.275167,...,0.152214,-0.093619,0.093414,3562,6,4,wday,10,2021-10-01 06:01:52,0
1,127912,5438,1,43,Russia,Gudermes,1,movie,5.816942,0.311333,...,-0.022530,-0.016797,0.007912,520,6,4,wday,10,2021-10-01 06:02:00,0
2,59784,680,0,29,Russia,Ryazan,3,entertainment,7.744020,0.506114,...,0.044818,-0.044923,-0.017650,1240,6,4,wday,10,2021-10-01 06:02:01,0
3,123183,7112,1,44,Russia,Moscow,3,movie,6.154851,0.437016,...,-0.038261,0.068309,-0.057558,602,6,4,wday,10,2021-10-01 06:02:29,1
4,109583,5706,0,22,Azerbaijan,Baku,2,movie,5.345223,0.334889,...,-0.074250,0.198745,-0.114454,613,6,4,wday,10,2021-10-01 06:02:29,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303492,135762,6042,0,17,Ukraine,Pidhorodne,1,movie,6.913636,0.297125,...,-0.033062,0.042358,-0.034945,918,7,1,wday,12,2021-12-14 07:43:38,0
1303493,40245,5356,1,30,Russia,Staryy Oskol,2,movie,9.165209,0.319290,...,-0.017236,-0.026497,0.038021,1593,7,1,wday,12,2021-12-14 07:43:48,1
1303494,106385,4105,1,31,Russia,Dzerzhinsk,1,covid,3.296829,0.429926,...,-0.077449,0.015875,0.002982,140,7,1,wday,12,2021-12-14 07:43:48,1
1303495,73181,1849,0,23,Russia,Volgograd,3,sport,8.216981,0.579082,...,0.035849,-0.032265,-0.037864,1632,7,1,wday,12,2021-12-14 07:44:03,0


'Предварительная выборка на тест: '

Unnamed: 0,user_id,post_id,gender,age,country,city,exp_group,topic,SumTfIdf,MaxTfIdf,...,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,weekday_cat,month_cat,timestamp,like_target
1303497,141235,1875,1,37,Russia,Yaroslavl,2,sport,11.392275,0.261541,...,0.021906,-0.046546,-0.049026,2504,7,1,wday,12,2021-12-14 07:44:16,1
1303498,153143,1017,1,35,Russia,Krasnotur’insk,3,politics,9.538580,0.514062,...,0.162451,0.109948,0.112348,5387,7,1,wday,12,2021-12-14 07:44:22,0
1303499,151472,547,0,21,Russia,Vladivostok,4,entertainment,8.103359,0.471732,...,0.105722,0.031023,0.031372,1480,7,1,wday,12,2021-12-14 07:44:22,1
1303500,61455,4265,0,34,Russia,Volzhskiy,2,movie,9.146074,0.228866,...,-0.005752,-0.095259,0.069579,1279,7,1,wday,12,2021-12-14 07:44:22,1
1303501,37564,1910,1,59,Russia,Nakhodka,2,sport,8.582594,0.280798,...,0.045479,-0.031218,-0.045870,1447,7,1,wday,12,2021-12-14 07:44:22,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1629367,13350,1786,1,42,Russia,Ivanovo,0,sport,9.580836,0.478502,...,0.078165,-0.076394,-0.099037,2144,23,2,wday,12,2021-12-29 23:41:34,0
1629368,85153,2549,1,45,Russia,Moscow,4,covid,3.380304,0.393678,...,-0.044052,-0.001459,-0.008468,140,23,2,wday,12,2021-12-29 23:42:07,0
1629369,109595,4047,1,22,Russia,Perm,1,covid,3.250582,0.539436,...,-0.074330,0.009696,-0.002897,139,23,2,wday,12,2021-12-29 23:43:15,0
1629370,18441,1786,1,49,Belarus,Zhytkavichy,3,sport,9.580836,0.478502,...,0.078165,-0.076394,-0.099037,2144,23,2,wday,12,2021-12-29 23:45:42,1


In [12]:
X_train = train_data.drop('like_target', axis=1)
y_train = train_data['like_target']

X_test = test_data.drop('like_target', axis=1)
y_test = test_data['like_target']

In [16]:
cat_features = ['country', 'city', 'topic', 'weekday_cat']

In [48]:
def catboost_clf(x_random_state, X_train_transformed, y_train_transformed, X_test_transformed, y_test_transformed, metric='AUC-ROC'):
    """
    Классификатор CatBoostClassifier с подбором гиперпараметров по метрике AUC-ROC или F1-Score.
    """
    print("CatBoostClassifier on Transformed Data")
    
    # Начальные значения для поиска наилучших гиперпараметров
    max_metric = 0
    best_depth = 0
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=x_random_state)
    metrics_data = []
    
    # Определяем метрику
    if metric == 'AUC-ROC':
        metric_func = roc_auc_score
        metric_name = 'AUC-ROC'
    elif metric == 'F1-Score':
        metric_func = f1_score
        metric_name = 'F1-Score'
    else:
        raise ValueError("Invalid metric. Choose either 'AUC-ROC' or 'F1-Score'.")
    
    # Перебор различных значений глубины дерева
    for depth in tqdm(range(4, 11), desc="Depth Progress", leave=True):  # Внешний прогресс-бар для перебора глубины дерева
        fold_metrics = []
        params = {
            'iterations': 100,
            'learning_rate': 0.05,
            'depth': depth,
            'eval_metric': 'AUC' if metric == 'AUC-ROC' else 'F1',
            'random_seed': x_random_state,
            'logging_level': 'Silent',
        }
        for fold, (train_idx, valid_idx) in enumerate(kf.split(X_train_transformed, y_train_transformed)):
            X_fold_train, X_valid = X_train_transformed.iloc[train_idx], X_train_transformed.iloc[valid_idx]
            y_fold_train, y_valid = y_train_transformed.iloc[train_idx], y_train_transformed.iloc[valid_idx]
            
            model = CatBoostClassifier(**params)
            model.fit(X_fold_train, y_fold_train.to_numpy().ravel())
            
            # Предсказания и вычисление метрики
            if metric == 'AUC-ROC':
                y_pred = model.predict_proba(X_valid)[:, 1]
            else:
                y_pred = model.predict(X_valid)
            metric_value = metric_func(y_valid, y_pred)
            fold_metrics.append(metric_value)
        
        avg_metric = np.mean(fold_metrics)
        metrics_data.append([avg_metric, depth])
        if max_metric < avg_metric:
            max_metric = avg_metric
            best_depth = depth
    
    # Сохранение данных о метриках
    metrics_data = pd.DataFrame(metrics_data, columns=[metric_name, 'Depth'])
    metrics_data["best_depth"] = best_depth
    metrics_data["max_train_metric"] = max_metric
    print(f'Maximum {metric_name} = {max_metric:.4f} | Best Depth = {best_depth}')
    
    # Обучение на всех тренировочных данных с лучшими гиперпараметрами
    best_params = {
        'iterations': 100,
        'learning_rate': 0.05,
        'depth': best_depth,
        'eval_metric': 'AUC' if metric == 'AUC-ROC' else 'F1',
        'random_seed': x_random_state,
        'logging_level': 'Silent'
    }
    best_model = CatBoostClassifier(**best_params)
    best_model.fit(X_train_transformed, y_train_transformed.to_numpy().ravel())
    
    # Оценка на тестовых данных
    y_pred_test_auc_roc = best_model.predict_proba(X_test_transformed)[:, 1]
    y_pred_test_f1 = best_model.predict(X_test_transformed)
    metrics_data["max_test_auc_roc"] = roc_auc_score(y_test_transformed, y_pred_test_auc_roc)
    metrics_data["max_test_f1"] = f1_score(y_test_transformed, y_pred_test_f1)
    
    if metric == 'AUC-ROC':
        y_pred_test = best_model.predict_proba(X_test_transformed)[:, 1]
    else:
        y_pred_test = best_model.predict(X_test_transformed)
    
    test_metric = metric_func(y_test_transformed, y_pred_test)
    print(f'Test {metric_name}: {test_metric:.4f}')
    display(metrics_data)
    
    # Анализ важных фичей на основе важности признаков
    importances = best_model.get_feature_importance()
    feature_importance = pd.DataFrame(
        importances,
        index=X_train_transformed.columns,
        columns=['importance']
    ).sort_values(by='importance', ascending=False)
    
    return best_model, metrics_data

In [40]:
CatB_best_model, CatB_metrics_data = catboost_clf(    
    x_random_state=42,    
    X_train_transformed=X_train,
    y_train_transformed=y_train,    
    X_test_transformed=X_test,
    y_test_transformed=y_test,
    metric='AUC-ROC'  # AUC-ROC / F1-Score
)

CatBoostClassifier on Transformed Data


Depth Progress: 100%|██████████| 7/7 [10:29<00:00, 89.92s/it] 


Maximum AUC-ROC = 0.6488 | Best Depth = 10
Test AUC-ROC: 0.6425


Unnamed: 0,AUC-ROC,Depth,best_depth,max_train_metric,max_test_auc_roc,max_test_f1
0,0.624859,4,10,0.648809,0.642528,0.658427
1,0.632879,5,10,0.648809,0.642528,0.658427
2,0.637327,6,10,0.648809,0.642528,0.658427
3,0.640502,7,10,0.648809,0.642528,0.658427
4,0.643053,8,10,0.648809,0.642528,0.658427
5,0.645886,9,10,0.648809,0.642528,0.658427
6,0.648809,10,10,0.648809,0.642528,0.658427


In [41]:
CatB_best_model.save_model('catboost_model_new',
                           format="cbm")

from_file = CatBoostClassifier()  # здесь не указываем параметры, которые были при обучении, в дампе модели все есть

from_file.load_model('catboost_model_new')

from_file.predict(X_train)

array([0, 0, 0, ..., 1, 1, 1], dtype=int64)

In [38]:
features.columns

Index(['user_id', 'post_id', 'gender', 'age', 'country', 'city', 'exp_group',
       'topic', 'SumTfIdf', 'MaxTfIdf', 'MeanTfIdf', 'PCA_1_TfIdf',
       'PCA_2_TfIdf', 'PCA_3_TfIdf', 'PCA_4_TfIdf', 'len_text_num', 'hour_cat',
       'day_of_week_cat', 'weekday_cat', 'month_cat', 'timestamp',
       'like_target'],
      dtype='object')

In [37]:
CatB_best_model.feature_importances_

array([ 0.02579936, 10.15956697,  0.14713953, 24.61517883,  0.60537097,
        1.54830858,  0.5986174 , 13.01253293,  1.37625421,  1.61732115,
        0.72829403,  6.88555521,  4.41109856,  4.00013018,  2.36263058,
        0.70535482,  4.81181206,  0.10788179,  0.        , 13.46212388,
        8.81902897])

In [42]:
features_to_drop = ['country', 'city', 'topic']

In [50]:
features = features.drop('weekday_cat', axis=1)

In [44]:
def train_test_split_sorted(data, train_size=0.8):
    
    ### Отсортируем данные по дате
    display("Сортировка данных по дате: ")
    data["timestamp"] = pd.to_datetime(data["timestamp"])
    data = data.sort_values(by="timestamp")
    data.reset_index(drop=True, inplace=True)
    
    ### Делим выборку 80 на 20
    split_index = int(len(data) * train_size)
    train = data.iloc[:split_index].copy()
    test = data.iloc[split_index:].copy()
    display("Предварительная выборка на трейн: ")
    display(train)
    display("Предварительная выборка на тест: ")
    display(test)
    
    return train, test

In [51]:
train_data, test_data = train_test_split_sorted(features, train_size=0.8)

'Сортировка данных по дате: '

'Предварительная выборка на трейн: '

Unnamed: 0,user_id,post_id,gender,age,exp_group,SumTfIdf,MaxTfIdf,MeanTfIdf,PCA_1_TfIdf,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,month_cat,timestamp,like_target
0,107767,685,0,31,4,13.597845,0.275167,0.000264,0.024860,0.152214,-0.093619,0.093414,3562,6,4,10,2021-10-01 06:01:52,0
1,127912,5438,1,43,1,5.816942,0.311333,0.000113,0.037664,-0.022530,-0.016797,0.007912,520,6,4,10,2021-10-01 06:02:00,0
2,59784,680,0,29,3,7.744020,0.506114,0.000150,-0.026675,0.044818,-0.044923,-0.017650,1240,6,4,10,2021-10-01 06:02:01,0
3,123183,7112,1,44,3,6.154851,0.437016,0.000119,0.109425,-0.038261,0.068309,-0.057558,602,6,4,10,2021-10-01 06:02:29,1
4,109583,5706,0,22,2,5.345223,0.334889,0.000104,0.219629,-0.074250,0.198745,-0.114454,613,6,4,10,2021-10-01 06:02:29,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1303492,135762,6042,0,17,1,6.913636,0.297125,0.000134,0.092211,-0.033062,0.042358,-0.034945,918,7,1,12,2021-12-14 07:43:38,0
1303493,40245,5356,1,30,2,9.165209,0.319290,0.000178,0.150500,-0.017236,-0.026497,0.038021,1593,7,1,12,2021-12-14 07:43:48,1
1303494,106385,4105,1,31,1,3.296829,0.429926,0.000064,-0.115126,-0.077449,0.015875,0.002982,140,7,1,12,2021-12-14 07:43:48,1
1303495,73181,1849,0,23,3,8.216981,0.579082,0.000159,-0.044057,0.035849,-0.032265,-0.037864,1632,7,1,12,2021-12-14 07:44:03,0


'Предварительная выборка на тест: '

Unnamed: 0,user_id,post_id,gender,age,exp_group,SumTfIdf,MaxTfIdf,MeanTfIdf,PCA_1_TfIdf,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,month_cat,timestamp,like_target
1303497,141235,1875,1,37,2,11.392275,0.261541,0.000221,-0.012752,0.021906,-0.046546,-0.049026,2504,7,1,12,2021-12-14 07:44:16,1
1303498,153143,1017,1,35,3,9.538580,0.514062,0.000185,-0.049781,0.162451,0.109948,0.112348,5387,7,1,12,2021-12-14 07:44:22,0
1303499,151472,547,0,21,4,8.103359,0.471732,0.000157,-0.056031,0.105722,0.031023,0.031372,1480,7,1,12,2021-12-14 07:44:22,1
1303500,61455,4265,0,34,2,9.146074,0.228866,0.000177,0.107082,-0.005752,-0.095259,0.069579,1279,7,1,12,2021-12-14 07:44:22,1
1303501,37564,1910,1,59,2,8.582594,0.280798,0.000167,-0.053613,0.045479,-0.031218,-0.045870,1447,7,1,12,2021-12-14 07:44:22,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1629367,13350,1786,1,42,0,9.580836,0.478502,0.000186,-0.001552,0.078165,-0.076394,-0.099037,2144,23,2,12,2021-12-29 23:41:34,0
1629368,85153,2549,1,45,4,3.380304,0.393678,0.000066,-0.077304,-0.044052,-0.001459,-0.008468,140,23,2,12,2021-12-29 23:42:07,0
1629369,109595,4047,1,22,1,3.250582,0.539436,0.000063,-0.110055,-0.074330,0.009696,-0.002897,139,23,2,12,2021-12-29 23:43:15,0
1629370,18441,1786,1,49,3,9.580836,0.478502,0.000186,-0.001552,0.078165,-0.076394,-0.099037,2144,23,2,12,2021-12-29 23:45:42,1


In [52]:
X_train = train_data.drop('like_target', axis=1)
y_train = train_data['like_target']

X_test = test_data.drop('like_target', axis=1)
y_test = test_data['like_target']

In [53]:
CatB_best_model, CatB_metrics_data = catboost_clf(    
    x_random_state=42,    
    X_train_transformed=X_train,
    y_train_transformed=y_train,    
    X_test_transformed=X_test,
    y_test_transformed=y_test,
    metric='AUC-ROC'  # AUC-ROC / F1-Score
)

CatBoostClassifier on Transformed Data


Depth Progress: 100%|██████████| 7/7 [03:13<00:00, 27.62s/it]


Maximum AUC-ROC = 0.6484 | Best Depth = 10
Test AUC-ROC: 0.6423


Unnamed: 0,AUC-ROC,Depth,best_depth,max_train_metric,max_test_auc_roc,max_test_f1
0,0.625933,4,10,0.648432,0.642302,0.659051
1,0.632332,5,10,0.648432,0.642302,0.659051
2,0.635925,6,10,0.648432,0.642302,0.659051
3,0.640386,7,10,0.648432,0.642302,0.659051
4,0.642865,8,10,0.648432,0.642302,0.659051
5,0.645696,9,10,0.648432,0.642302,0.659051
6,0.648432,10,10,0.648432,0.642302,0.659051


In [54]:
CatB_best_model.save_model('catboost_model_new',
                           format="cbm")

from_file = CatBoostClassifier()  # здесь не указываем параметры, которые были при обучении, в дампе модели все есть

from_file.load_model('catboost_model_new')

from_file.predict(X_train)

array([1, 1, 0, ..., 1, 1, 1], dtype=int64)

In [55]:
load_to_sql('darja_stiheeva_lms4973_features_lesson_22_pca_with_target_2', features)

In [56]:
features

Unnamed: 0,user_id,post_id,gender,age,exp_group,SumTfIdf,MaxTfIdf,MeanTfIdf,PCA_1_TfIdf,PCA_2_TfIdf,PCA_3_TfIdf,PCA_4_TfIdf,len_text_num,hour_cat,day_of_week_cat,month_cat,timestamp,like_target
0,200,3434,1,34,3,2.892646,0.510899,0.000056,-0.126161,-0.104655,0.020903,0.014462,140,22,5,11,2021-11-20 22:49:40,0
1,200,2416,1,34,3,2.512959,0.469197,0.000049,-0.176772,-0.169486,0.044218,0.036424,77,14,5,10,2021-10-02 14:20:40,0
2,200,1592,1,34,3,6.966328,0.540718,0.000135,-0.048699,0.054070,-0.007312,-0.021639,1112,19,4,10,2021-10-29 19:44:31,0
3,200,3151,1,34,3,3.133805,0.583546,0.000061,-0.124179,-0.095942,0.019393,0.013283,138,22,5,11,2021-11-20 22:55:51,0
4,200,1125,1,34,3,9.482513,0.422031,0.000184,-0.056744,0.135959,0.052061,0.046768,2443,9,1,11,2021-11-23 09:21:50,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1629367,168552,1915,1,16,4,7.159526,0.534125,0.000139,-0.019444,0.046621,-0.045287,-0.072064,1130,18,1,12,2021-12-07 18:33:29,1
1629368,168552,1810,1,16,4,7.573453,0.584978,0.000147,-0.013476,0.033978,-0.043798,-0.053374,1282,13,2,10,2021-10-20 13:47:41,1
1629369,168552,5487,1,16,4,5.709767,0.353557,0.000111,0.027904,-0.028779,-0.004209,-0.003524,674,9,1,12,2021-12-21 09:32:54,1
1629370,168552,3628,1,16,4,3.117580,0.457843,0.000060,-0.080417,-0.047158,-0.008549,-0.009046,140,14,1,11,2021-11-23 14:44:41,1
