In [1]:
import json
from functools import partial
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from catboost import CatBoostClassifier, Pool
from catboost.utils import eval_metric
from scipy.spatial.distance import cosine, euclidean
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score


### Load data

In [2]:
dataset = pd.read_parquet(r"C:\Users\druzh\Project_python\ozon_top_1\Datasets/train_pairs.parquet")
etl = pd.read_parquet(r"C:\Users\druzh\Project_python\ozon_top_1\Datasets/train_data.parquet")

In [3]:
dataset.head(2)

Unnamed: 0,target,variantid1,variantid2
0,0.0,51197862,51198054
1,1.0,53062686,536165289


Get raw data for each variantid.

In [4]:
features = (
    dataset
    .merge(
        etl
        .add_suffix('1'),
        on="variantid1"
    )
    .merge(
        etl
        .add_suffix('2'),
        on="variantid2"
    )
)

In [5]:
features.head(2)

Unnamed: 0,target,variantid1,variantid2,name1,categories1,color_parsed1,pic_embeddings_resnet_v11,main_pic_embeddings_resnet_v11,name_bert_641,characteristic_attributes_mapping1,name2,categories2,color_parsed2,pic_embeddings_resnet_v12,main_pic_embeddings_resnet_v12,name_bert_642,characteristic_attributes_mapping2
0,0.0,51197862,51198054,Удлинитель TDM Electric Люкс УЛ05В 5 м (SQ1303...,"{""1"": ""EPG"", ""2"": ""Электроника"", ""3"": ""Сетевые...",[белый],,"[[-0.4304909, -0.49474272, -0.46439183, -0.060...","[-0.5104684, 0.56158644, 0.58873796, -0.529718...","{""Число жил"":[""3""],""Макс. нагрузка, Вт"":[""3500...",Удлинитель TDM Electric Люкс УЛ05В 1.5 м (SQ13...,"{""1"": ""EPG"", ""2"": ""Электроника"", ""3"": ""Сетевые...",[белый],,"[[-0.42941108, -0.5129398, -0.4753536, -0.0677...","[-0.455473, 0.58157134, 0.5870387, -0.5325003,...","{""Электробезопасность"":[""Заземление""],""Длина к..."
1,0.0,51197862,51199884,Удлинитель TDM Electric Люкс УЛ05В 5 м (SQ1303...,"{""1"": ""EPG"", ""2"": ""Электроника"", ""3"": ""Сетевые...",[белый],,"[[-0.4304909, -0.49474272, -0.46439183, -0.060...","[-0.5104684, 0.56158644, 0.58873796, -0.529718...","{""Число жил"":[""3""],""Макс. нагрузка, Вт"":[""3500...",Удлинитель TDM Electric Люкс УЛ05В 3 м (SQ1303...,"{""1"": ""EPG"", ""2"": ""Электроника"", ""3"": ""Сетевые...",[белый],,"[[-0.43180764, -0.49580905, -0.5062628, -0.130...","[-0.5425725, 0.6415736, 0.51481575, -0.5687392...","{""Макс. нагрузка, Вт"":[""3500""],""Стандарт защит..."


Features functions.

In [10]:
feats = ["main_pic_embeddings_resnet_v11", "name_bert_641", "main_pic_embeddings_resnet_v12", "name_bert_642"]

In [11]:
X_train, X_test = train_test_split(
    features[feats + ["target", "variantid1", "variantid2"]], 
    test_size=0.1, random_state=42, stratify=features[["target"]])

X_train, X_val = train_test_split(
    X_train[feats + ["target"]], 
    test_size=0.1, random_state=42, stratify=X_train[["target"]])


y_test = X_test[["target"]]
X_test = X_test.drop(["target", "variantid1", "variantid2"], axis=1)

y_train = X_train["target"] 
X_train = X_train.drop(["target"], axis=1)

        


In [19]:
X_train.head(2)

Unnamed: 0,main_pic_embeddings_resnet_v11,name_bert_641,main_pic_embeddings_resnet_v12,name_bert_642
276475,"[[1.5177895, -0.1607113, -1.3436879, -0.518293...","[-0.5862704, 0.39754072, 0.7764767, -0.5785445...","[[1.330656, -0.2443413, -1.4133728, -0.3939227...","[-0.5026867, 0.36081815, 0.7274831, -0.5621019..."
238547,"[[0.4010295, -0.16012707, -0.9049238, -0.00406...","[-0.49965647, 0.75911903, 0.5878046, -0.476294...","[[0.19791354, 0.031553254, -0.44747415, 0.1454...","[-0.498988, 0.7581946, 0.6100502, -0.45187664,..."


In [23]:
X_train["main_pic_embeddings_resnet_v11"][276475][0]

array([ 1.5177895 , -0.1607113 , -1.3436879 , -0.5182934 ,  1.9222594 ,
        1.409966  , -0.07100956, -1.3819436 ,  0.29979265, -0.46199632,
        0.41519195,  0.62992084, -0.37708902,  0.526022  , -1.3048393 ,
       -0.3913864 ,  1.6058913 ,  0.40473   , -0.02482107, -0.573033  ,
       -0.70632994, -0.15699726, -0.8837389 ,  0.804603  ,  0.12449124,
       -0.8059559 , -0.7594    , -0.4075185 , -0.73291975,  1.1321254 ,
       -0.05538362, -0.24840048, -0.35928893,  1.7011063 , -0.34390223,
        0.2926876 , -0.39033335, -1.3694782 ,  1.3234332 , -0.3599332 ,
       -1.5499215 ,  0.09847139,  1.3356067 , -0.32096905, -0.5689085 ,
        0.00678925, -0.27474916,  1.0556518 , -1.9857645 , -0.75344956,
        1.6898749 ,  0.5296958 ,  0.15033466, -0.5294885 , -0.06423454,
        0.547827  , -1.2910435 ,  0.02101413, -0.05007964,  0.9777609 ,
        0.82468736, -0.55121124,  1.6157899 , -0.51937044,  0.5853788 ,
        0.6077431 , -0.10538843,  0.1991615 , -0.33610523, -0.66

In [24]:
X_train_final = []

for i in range(len(X_train)):
    row = []
    row.extend(list(X_train[feats[0]].iloc[i])[0])
    row.extend(list(X_train[feats[1]].iloc[i]))
    row.extend(list(X_train[feats[2]].iloc[i])[0])
    row.extend(list(X_train[feats[3]].iloc[i]))
    X_train_final.append(row)

In [27]:
X_test_final = []

for i in range(len(X_test)):
    row = []
    row.extend(list(X_test[feats[0]].iloc[i])[0])
    row.extend(list(X_test[feats[1]].iloc[i]))
    row.extend(list(X_test[feats[2]].iloc[i])[0])
    row.extend(list(X_test[feats[3]].iloc[i]))
    X_test_final.append(row)

In [28]:
from sklearn.linear_model import SGDClassifier

def train_logistic_regression(X, y, chunk_size, num_epochs=10):
    clf = SGDClassifier(loss='log')  # Используем логистическую регрессию
    num_samples = len(X)
    num_chunks = num_samples // chunk_size

    for epoch in range(num_epochs):
        print(epoch)
        for chunk_idx in range(num_chunks):
            start_idx = chunk_idx * chunk_size
            end_idx = start_idx + chunk_size
            X_chunk = X[start_idx:end_idx]
            y_chunk = y[start_idx:end_idx]
            clf.partial_fit(X_chunk, y_chunk, classes=np.unique(y))
    
    return clf

In [30]:
chunk_size = 60000
num_epochs = 200

model = train_logistic_regression(X_train_final, y_train, chunk_size, num_epochs)

0


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


1


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


2


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


3


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


4


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


5


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


6


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


7


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


8


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


9


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


10


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


11


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


12


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


13


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


14


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


15


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


16


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


17


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


18


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


19


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


20


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


21


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


22


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


23


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


24


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


25


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


26


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


27


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


28


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


29


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


30


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


31


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


32


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


33


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


34


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


35


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


36


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


37


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


38


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


39


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


40


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


41


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


42


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


43


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


44


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


45


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


46


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


47


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


48


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


49


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


50


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


51


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


52


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


53


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


54


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


55


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


56


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


57


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


58


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


59


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


60


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


61


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


62


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


63


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


64


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


65


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


66


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


67


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


68


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


69


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


70


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


71


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


72


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


73


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


74


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


75


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


76


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


77


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


78


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


79


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


80


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


81


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


82


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


83


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


84


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


85


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


86


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


87


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


88


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


89


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


90


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


91


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


92


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


93


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


94


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


95


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


96


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


97


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


98


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


99


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


100


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


101


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


102


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


103


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


104


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


105


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


106


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


107


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


108


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


109


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


110


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


111


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


112


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


113


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


114


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


115


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


116


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


117


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


118


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


119


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


120


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


121


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


122


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


123


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


124


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


125


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


126


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


127


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


128


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


129


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


130


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


131


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


132


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


133


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


134


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


135


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


136


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


137


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


138


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


139


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


140


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


141


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


142


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


143


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


144


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


145


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


146


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


147


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


148


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


149


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


150


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


151


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


152


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


153


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


154


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


155


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


156


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


157


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


158


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


159


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


160


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


161


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


162


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


163


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


164


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


165


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


166


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


167


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


168


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


169


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


170


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


171


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


172


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


173


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


174


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


175


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


176


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


177


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


178


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


179


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


180


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


181


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


182


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


183


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


184


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


185


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


186


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


187


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


188


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


189


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


190


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


191


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


192


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


193


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


194


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


195


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


196


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


197


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


198


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


199


  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]
  y_chunk = y[start_idx:end_idx]


In [31]:
import joblib

joblib.dump(model, 'logistic_regression_w_embs_only_model_200_epochs.pkl')

['logistic_regression_w_embs_only_model_200_epochs.pkl']

In [34]:
import random
from sklearn.metrics import mean_squared_error
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
prediction = model.predict(X_test_final)

mse = mean_squared_error(y_test, prediction)
print("Mean Squared Error:", mse)

f1 = f1_score(y_test, prediction)
print("f1:", f1)

accuracy = accuracy_score(y_test, prediction)
precision = precision_score(y_test, prediction)
recall = recall_score(y_test, prediction)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Mean Squared Error: 0.2988843217850851
f1: 0.6298480930833872
Accuracy: 0.7011156782149148
Precision: 0.6928273042396231
Recall: 0.5773646396563218
