<h1>Содержание<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Обзор-данных" data-toc-modified-id="Обзор-данных-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Обзор данных</a></span></li><li><span><a href="#Корреляционный-анализ" data-toc-modified-id="Корреляционный-анализ-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Корреляционный анализ</a></span><ul class="toc-item"><li><span><a href="#Столбцы-с-0-по-199" data-toc-modified-id="Столбцы-с-0-по-199-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Столбцы с 0 по 199</a></span></li><li><span><a href="#Столбцы-с-200-по-399" data-toc-modified-id="Столбцы-с-200-по-399-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Столбцы с 200 по 399</a></span></li><li><span><a href="#Столбцы-с-400-по-599" data-toc-modified-id="Столбцы-с-400-по-599-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Столбцы с 400 по 599</a></span></li><li><span><a href="#Столбцы-с-600-по-799" data-toc-modified-id="Столбцы-с-600-по-799-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Столбцы с 600 по 799</a></span></li><li><span><a href="#Столбцы-с-800-по-930" data-toc-modified-id="Столбцы-с-800-по-930-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Столбцы с 800 по 930</a></span></li></ul></li></ul></div>

# Отбор значимых непрерывных признаков

## Обзор данных

In [1]:
# импорт основных библиотек
import pandas as pd
import numpy as np

# импорт библиотек текущего модуля
from scipy.stats import spearmanr
import phik

# настройки
pd.set_option('display.max_columns', 500)

# пути
dataset_train_path = '../../data/dataset_train.parquet'
features_types_path = '../../data/features_types.json'
sample_submission_path = '../../data/sample_submission.csv'

# константы
RANDOM_STATE = 42


In [2]:
# читаем файлы
features_types = pd.read_json(features_types_path, orient='index')
sample_submission = pd.read_csv(sample_submission_path)


In [3]:
features_types.head()

Unnamed: 0,0
markers_0_1_cnt,numeric
markers_1_1_cnt,numeric
markers_2_1_cnt,numeric
markers_3_1_cnt,numeric
markers_4_1_cnt,numeric


In [4]:
# приводим features_types к читаемому виду
features_types = features_types \
    .reset_index() \
    .rename(columns={'index': 'feature', 0: 'type'})

# выводим на экран
features_types


Unnamed: 0,feature,type
0,markers_0_1_cnt,numeric
1,markers_1_1_cnt,numeric
2,markers_2_1_cnt,numeric
3,markers_3_1_cnt,numeric
4,markers_4_1_cnt,numeric
...,...,...
2771,markers_941_1_cnt,numeric
2772,markers_942_1_cnt,numeric
2773,markers_943_1_cnt,numeric
2774,markers_944_1_cnt,numeric


In [5]:
# распределение типов данных
features_types.type.value_counts()


type
numeric            2607
categorical_int     138
categorical_str      31
Name: count, dtype: int64

## Корреляционный анализ

### Столбцы с 0 по 199

In [6]:
def read_data(path: str, left: int, right: int):
    """
    Функция для считывания определенного количества столбцов из parquet файла.
    ---
    Принимает на вход str путь к файлу, int левую и int правую границу.
    Столбец с таргетом добавляется в конец автоматически!
    Функция возвращает pd.DataFrame.
    """
    # следующие т столбцов, которые будем считывать + target
    columns_to_read = features_types['feature'][left:right].to_list() + ['target']

    # считываем столбцы в файл
    data = pd.read_parquet(path, engine='pyarrow', columns=columns_to_read)

    return data


def variance_filter(data: pd.DataFrame, threshold: float):
    """
    Функция для проверки датасета на наличие константных (или около того) значений.
    ---
    Принимает на вход pd.DataFrame и float с значением порога для дисперсии.
    Функция возвращает отфильтрованный pd.DataFrame.
    """
    # порог для дисперсии
    threshold = threshold

    # считаем дисперсию для каждого признака
    variance = data.iloc[:, :-1].var()

    # отбираем те фичи, у которых var > threshold
    selected_features = variance[variance > threshold].index.tolist()

    # собираем в датасет
    filtered_data = data[selected_features + ['target']]
    
    return filtered_data
    

def check_corr(data: pd.DataFrame):
    """
    Функция для расчета корреляции Пирсона всех признаков с целевой переменной.
    ---
    Принимает на вход pd.DataFrame.
    Возвращает pd.Series с значением коэффициента корреляции для каждого признака.
    """
    # чекаем корреляцию с таргетом
    correlation = data.iloc[:, :-1].corrwith(data['target'])
    
    # сортируем
    correlation = correlation.sort_values(ascending=False)
    
    return correlation

In [7]:
# считываем необходимое количество столбцов
dataset_0_200 = read_data(dataset_train_path, 0, 200)
dataset_0_200.head()

Unnamed: 0,markers_0_1_cnt,markers_1_1_cnt,markers_2_1_cnt,markers_3_1_cnt,markers_4_1_cnt,markers_5_1_cnt,markers_6_1_cnt,markers_7_1_cnt,markers_8_1_cnt,markers_9_1_cnt,markers_10_1_cnt,markers_11_1_cnt,markers_12_1_cnt,markers_13_1_cnt,markers_14_1_cnt,markers_15_1_cnt,markers_16_1_cnt,markers_17_1_cnt,markers_18_1_cnt,markers_19_1_cnt,markers_20_1_cnt,markers_21_1_cnt,markers_22_1_cnt,markers_23_1_cnt,markers_24_1_cnt,markers_25_1_cnt,markers_26_1_cnt,markers_27_1_cnt,markers_28_1_cnt,markers_29_1_cnt,markers_30_1_cnt,markers_31_1_cnt,markers_32_1_cnt,markers_33_1_cnt,markers_34_1_cnt,markers_35_1_cnt,markers_36_1_cnt,markers_37_1_cnt,markers_38_1_cnt,markers_39_1_cnt,markers_40_1_cnt,markers_41_1_cnt,markers_42_1_cnt,markers_43_1_cnt,markers_44_1_cnt,markers_45_1_cnt,markers_46_1_cnt,markers_47_1_cnt,markers_48_1_cnt,markers_49_1_cnt,markers_50_1_cnt,markers_51_1_cnt,markers_52_1_cnt,markers_53_1_cnt,markers_54_1_cnt,markers_55_1_cnt,markers_56_1_cnt,markers_57_1_cnt,markers_58_1_cnt,markers_59_1_cnt,markers_60_1_cnt,markers_61_1_cnt,markers_62_1_cnt,markers_63_1_cnt,markers_64_1_cnt,markers_65_1_cnt,markers_66_1_cnt,markers_67_1_cnt,markers_68_1_cnt,markers_69_1_cnt,markers_70_1_cnt,markers_71_1_cnt,markers_72_1_cnt,markers_73_1_cnt,markers_74_1_cnt,markers_75_1_cnt,markers_76_1_cnt,markers_77_1_cnt,markers_78_1_cnt,markers_79_1_cnt,markers_80_1_cnt,markers_81_1_cnt,markers_82_1_cnt,markers_83_1_cnt,markers_84_1_cnt,markers_85_1_cnt,markers_86_1_cnt,markers_87_1_cnt,markers_88_1_cnt,markers_89_1_cnt,markers_90_1_cnt,markers_91_1_cnt,markers_92_1_cnt,markers_93_1_cnt,markers_94_1_cnt,markers_95_1_cnt,markers_96_1_cnt,markers_97_1_cnt,markers_98_1_cnt,markers_99_1_cnt,markers_100_1_cnt,markers_101_1_cnt,markers_102_1_cnt,markers_103_1_cnt,markers_104_1_cnt,markers_105_1_cnt,markers_106_1_cnt,markers_107_1_cnt,markers_108_1_cnt,markers_109_1_cnt,markers_110_1_cnt,markers_111_1_cnt,markers_112_1_cnt,markers_113_1_cnt,markers_114_1_cnt,markers_115_1_cnt,markers_116_1_cnt,markers_117_1_cnt,markers_118_1_cnt,markers_119_1_cnt,markers_120_1_cnt,markers_121_1_cnt,markers_122_1_cnt,markers_123_1_cnt,markers_124_1_cnt,markers_125_1_cnt,markers_126_1_cnt,markers_128_1_cnt,markers_129_1_cnt,markers_131_1_cnt,markers_132_1_cnt,markers_133_1_cnt,markers_135_1_cnt,markers_136_1_cnt,markers_137_1_cnt,markers_138_1_cnt,markers_139_1_cnt,markers_140_1_cnt,markers_141_1_cnt,markers_142_1_cnt,markers_143_1_cnt,markers_145_1_cnt,markers_146_1_cnt,markers_147_1_cnt,markers_148_1_cnt,markers_149_1_cnt,markers_150_1_cnt,markers_151_1_cnt,markers_154_1_cnt,markers_155_1_cnt,markers_156_1_cnt,markers_157_1_cnt,markers_158_1_cnt,markers_159_1_cnt,markers_160_1_cnt,markers_161_1_cnt,markers_162_1_cnt,markers_163_1_cnt,markers_164_1_cnt,markers_165_1_cnt,markers_166_1_cnt,markers_167_1_cnt,markers_168_1_cnt,markers_169_1_cnt,markers_170_1_cnt,markers_171_1_cnt,markers_172_1_cnt,markers_173_1_cnt,markers_174_1_cnt,markers_175_1_cnt,markers_176_1_cnt,markers_177_1_cnt,markers_178_1_cnt,markers_179_1_cnt,markers_180_1_cnt,markers_181_1_cnt,markers_182_1_cnt,markers_183_1_cnt,markers_184_1_cnt,markers_185_1_cnt,markers_186_1_cnt,markers_187_1_cnt,markers_188_1_cnt,markers_189_1_cnt,markers_190_1_cnt,markers_191_1_cnt,markers_192_1_cnt,markers_193_1_cnt,markers_194_1_cnt,markers_195_1_cnt,markers_196_1_cnt,markers_198_1_cnt,markers_199_1_cnt,markers_201_1_cnt,markers_202_1_cnt,markers_203_1_cnt,markers_204_1_cnt,markers_205_1_cnt,markers_206_1_cnt,markers_207_1_cnt,target
0,-0.149534,-0.096585,-0.40087,-0.362458,0.302917,-0.324892,-0.046691,-0.554345,-0.016534,-0.05408,-0.044653,-0.208968,-0.017454,-0.008052,-0.028391,-0.031395,-0.199957,-0.064366,-0.026839,-0.027484,-0.03147,-0.080534,-0.110653,-0.11212,-0.02527,-0.115151,-0.04685,-0.033911,-0.00794,-0.025816,-0.088375,-0.009532,-0.237766,-0.041626,-0.012926,-0.028445,-0.009258,-0.026347,-0.038496,-0.08904,-0.401337,-0.03564,-0.100471,-0.035238,-0.009976,-0.031354,-0.006144,-0.070189,-0.047567,-0.050881,-0.190083,-0.084819,-0.060074,-0.041818,-0.100019,-0.016022,-0.056574,-0.028238,,,,-0.59209,-0.160617,-0.053797,-0.095108,0.43545,-0.095386,-0.152929,-0.338622,-0.092801,-0.29257,-0.133324,-0.400799,1.888996,-0.630714,,-0.063061,,0.0,-0.031579,-0.012766,-0.031895,-0.01416,-0.035219,,-0.069548,-0.034335,-0.083702,-0.095564,-0.053049,-0.023793,-0.046598,-0.020839,-0.272352,-0.07247,-0.00481,-0.044856,-0.024959,-0.009783,-0.338382,-0.365005,-0.152973,-0.354618,-1.003859,0.351562,-0.251485,-0.020943,-0.043771,-0.148337,-0.031498,-0.050959,-0.023588,-0.009636,-0.217079,-0.03998,-0.070522,-0.085858,-0.055425,,-0.239854,-0.266803,-0.045629,-0.494135,,-0.050901,-0.229324,-0.122421,-0.331202,-0.019986,-0.101839,-0.24505,-0.051652,-0.456677,-0.033475,-0.128759,-0.080313,-0.059051,-0.015493,-0.06377,-0.073248,,,-0.125771,-0.011323,-0.116921,-0.623644,,-0.05825,-0.018025,-0.017433,-0.036091,-0.033809,-0.025577,-0.023001,,-0.048036,,-0.318884,-0.039984,-0.05994,-0.009467,-0.205272,-0.078859,-0.101097,-0.113138,-0.405617,-0.056281,-0.019416,-0.094575,,-0.302369,-0.135672,-0.032676,-0.07738,-0.236518,,-0.022385,-0.380742,-0.737386,-0.246198,,-0.083652,-0.00957,-0.006186,-0.034974,-0.010496,-0.080347,,,-0.233142,-0.317366,-0.027186,-0.547415,,,,-0.206982,-0.010211,-0.01351,-0.197118,0
1,-0.149534,-0.096585,0.196468,-0.362458,0.092276,-0.324892,-0.046691,-0.554345,-0.016534,-0.05408,-0.044653,-0.208968,-0.017454,-0.008052,-0.028391,-0.031395,-0.199957,-0.064366,-0.026839,-0.027484,-0.03147,-0.080534,-0.110653,-0.11212,-0.02527,-0.115151,-0.04685,-0.033911,-0.00794,-0.025816,-0.088375,-0.009532,-0.237766,-0.041626,-0.012926,-0.028445,-0.009258,-0.026347,-0.038496,-0.08904,-0.401337,-0.03564,-0.100471,-0.035238,-0.009976,-0.031354,-0.006144,-0.070189,-0.047567,-0.050881,-0.190083,-0.084819,-0.060074,-0.041818,-0.100019,-0.016022,-0.056574,-0.028238,-0.003511,-0.593266,-0.956244,-0.719615,-0.160617,-0.053797,-0.095108,-0.562652,-0.095386,-0.152929,-0.338622,-0.092801,-0.29257,-0.133324,-0.400799,-0.463549,-0.630714,-0.110003,-0.063061,-0.007826,0.0,-0.031579,-0.012766,-0.031895,-0.01416,-0.035219,-0.009355,-0.069548,-0.034335,-0.083702,-0.095564,-0.053049,-0.023793,-0.046598,-0.020839,-0.272352,-0.07247,-0.00481,-0.044856,-0.024959,-0.009783,-0.470136,-0.365005,-0.152973,-0.354618,-1.003859,-1.873733,-0.251485,-0.020943,-0.043771,-0.148337,-0.031498,-0.050959,-0.023588,-0.009636,-0.217079,-0.03998,-0.070522,-0.085858,-0.055425,-0.014615,-0.239854,-0.266803,-0.045629,-0.494135,-0.005961,-0.050901,-0.229324,-0.122421,-0.331202,-0.019986,-0.101839,-0.24505,-0.051652,-0.09507,-0.033475,-0.128759,-0.080313,-0.059051,-0.015493,-0.06377,-0.073248,,-0.080794,-0.5042,-0.011323,-0.116921,-0.623644,-0.062114,-0.05825,-0.018025,-0.017433,-0.036091,-0.033809,-0.025577,-0.023001,-0.015221,-0.048036,-0.007325,-0.318884,-0.039984,-0.05994,-0.009467,-0.205272,-0.078859,-0.101097,-0.113138,-0.405617,-0.056281,-0.019416,-0.094575,-0.031043,-0.302369,-0.135672,-0.032676,-0.07738,-0.236518,,-0.022385,-0.380742,-0.737386,-0.246198,,-0.083652,-0.00957,-0.006186,-0.034974,-0.010496,-0.080347,,,-0.233142,-0.317366,-0.027186,-0.547415,-0.065005,-0.040324,-0.028479,-0.206982,-0.010211,-0.01351,-0.197118,0
2,-0.149534,-0.096585,-0.102201,0.911996,0.7242,0.445189,-0.046691,-0.397624,-0.016534,-0.05408,-0.044653,-0.208968,-0.017454,-0.008052,-0.028391,-0.031395,-0.199957,-0.064366,-0.026839,-0.027484,-0.03147,-0.080534,-0.110653,-0.11212,-0.02527,-0.115151,-0.04685,-0.033911,-0.00794,-0.025816,-0.088375,-0.009532,-0.237766,-0.041626,-0.012926,-0.028445,-0.009258,-0.026347,-0.038496,-0.08904,-0.044543,-0.03564,5.115505,-0.035238,-0.009976,-0.031354,-0.006144,-0.070189,-0.047567,-0.050881,-0.190083,-0.084819,-0.060074,-0.041818,-0.100019,-0.016022,-0.056574,-0.028238,-0.003511,0.119073,0.712951,1.703355,-0.160617,-0.053797,-0.095108,0.185925,-0.095386,-0.152929,-0.338622,-0.092801,-0.29257,1.040303,-0.400799,0.228376,0.880443,-0.110003,-0.063061,-0.007826,0.0,-0.031579,-0.012766,-0.031895,-0.01416,-0.035219,-0.009355,-0.069548,-0.034335,-0.083702,-0.095564,-0.053049,-0.023793,-0.046598,-0.020839,-0.272352,-0.07247,-0.00481,-0.044856,-0.024959,-0.009783,-0.470136,-0.365005,-0.152973,0.364971,-1.003859,0.756162,-0.251485,-0.020943,-0.043771,-0.148337,-0.031498,-0.050959,-0.023588,-0.009636,3.661495,-0.03998,-0.070522,-0.085858,-0.055425,-0.014615,-0.239854,-0.266803,-0.045629,-0.494135,-0.005961,-0.050901,-0.229324,-0.122421,-0.331202,-0.019986,-0.101839,0.606835,-0.051652,-0.456677,-0.033475,-0.128759,-0.080313,-0.059051,-0.015493,-0.06377,-0.073248,,-0.080794,1.577163,-0.011323,-0.116921,0.051302,-0.062114,-0.05825,-0.018025,-0.017433,-0.036091,-0.033809,-0.025577,-0.023001,-0.015221,-0.048036,-0.007325,-0.318884,-0.039984,-0.05994,-0.009467,-0.205272,-0.078859,-0.101097,-0.113138,-0.405617,-0.056281,-0.019416,-0.094575,-0.031043,0.195448,-0.135672,-0.032676,-0.07738,-0.236518,,-0.022385,-0.380742,-0.737386,-0.246198,,-0.083652,-0.00957,-0.006186,-0.034974,-0.010496,-0.080347,,,-0.233142,0.191618,-0.027186,-0.547415,-0.065005,-0.040324,-0.028479,-0.206982,-0.010211,-0.01351,-0.197118,1
3,-0.149534,-0.096585,-0.40087,-0.362458,1.145482,-0.324892,-0.046691,-0.554345,-0.016534,-0.05408,-0.044653,-0.208968,-0.017454,-0.008052,-0.028391,-0.031395,-0.199957,-0.064366,-0.026839,-0.027484,-0.03147,-0.080534,-0.110653,-0.11212,-0.02527,-0.115151,-0.04685,-0.033911,-0.00794,-0.025816,-0.088375,-0.009532,-0.237766,-0.041626,-0.012926,-0.028445,-0.009258,-0.026347,-0.038496,2.330488,0.669046,-0.03564,-0.100471,-0.035238,-0.009976,-0.031354,-0.006144,-0.070189,-0.047567,-0.050881,-0.190083,-0.084819,-0.060074,-0.041818,5.598094,-0.016022,-0.056574,-0.028238,-0.003511,-0.593266,0.191328,-0.59209,-0.160617,-0.053797,-0.095108,-0.562652,-0.095386,-0.152929,-0.338622,-0.092801,-0.29257,-0.133324,-0.400799,-0.463549,0.448684,-0.110003,-0.063061,-0.007826,0.0,-0.031579,-0.012766,-0.031895,-0.01416,-0.035219,-0.009355,-0.069548,-0.034335,-0.083702,-0.095564,-0.053049,-0.023793,-0.046598,-0.020839,1.72242,-0.07247,-0.00481,-0.044856,-0.024959,-0.009783,0.452145,-0.365005,-0.152973,-0.354618,0.040148,1.059611,0.481778,-0.020943,-0.043771,-0.148337,-0.031498,-0.050959,-0.023588,-0.009636,-0.217079,-0.03998,-0.070522,2.350237,-0.055425,-0.014615,-0.239854,-0.266803,-0.045629,-0.494135,-0.005961,-0.050901,-0.229324,-0.122421,-0.331202,-0.019986,-0.101839,-0.24505,-0.051652,2.797792,-0.033475,-0.128759,-0.080313,-0.059051,-0.015493,-0.06377,-0.073248,-0.131086,-0.080794,1.577163,-0.011323,-0.116921,0.501265,-0.062114,-0.05825,-0.018025,-0.017433,-0.036091,-0.033809,-0.025577,-0.023001,-0.015221,-0.048036,-0.007325,0.765114,-0.039984,-0.05994,-0.009467,-0.205272,-0.078859,-0.101097,-0.113138,4.438389,-0.056281,-0.019416,-0.094575,-0.031043,0.195448,-0.135672,-0.032676,-0.07738,-0.236518,-0.050262,-0.022385,0.587302,0.401655,-0.246198,,-0.083652,-0.00957,-0.006186,-0.034974,-0.010496,-0.080347,-0.014305,-0.013827,0.096625,6.044929,-0.027186,2.251156,-0.065005,-0.040324,-0.028479,8.566268,-0.010211,-0.01351,-0.197118,0
4,-0.149534,-0.096585,0.196468,-0.362458,0.934841,-0.324892,-0.046691,0.072541,-0.016534,-0.05408,-0.044653,-0.208968,-0.017454,-0.008052,-0.028391,-0.031395,-0.199957,-0.064366,-0.026839,-0.027484,-0.03147,-0.080534,-0.110653,-0.11212,-0.02527,-0.115151,-0.04685,-0.033911,-0.00794,-0.025816,-0.088375,-0.009532,-0.237766,-0.041626,-0.012926,-0.028445,-0.009258,-0.026347,-0.038496,-0.08904,1.739428,-0.03564,-0.100471,-0.035238,-0.009976,-0.031354,-0.006144,-0.070189,-0.047567,-0.050881,-0.190083,-0.084819,-0.060074,-0.041818,-0.100019,-0.016022,-0.056574,-0.028238,-0.003511,-0.450798,-0.747595,-0.464565,-0.160617,-0.053797,-0.095108,-0.562652,-0.095386,-0.152929,-0.338622,-0.092801,-0.29257,-0.133324,-0.400799,-0.463549,0.016924,-0.110003,-0.063061,-0.007826,0.0,-0.031579,-0.012766,-0.031895,-0.01416,-0.035219,-0.009355,-0.069548,-0.034335,-0.083702,-0.095564,-0.053049,-0.023793,-0.046598,-0.020839,-0.272352,-0.07247,-0.00481,-0.044856,-0.024959,-0.009783,-0.470136,3.464682,-0.152973,-0.354618,-1.003859,0.958461,2.681566,-0.020943,-0.043771,-0.148337,-0.031498,-0.050959,-0.023588,-0.009636,-0.217079,-0.03998,-0.070522,-0.085858,-0.055425,-0.014615,-0.239854,-0.266803,-0.045629,-0.494135,-0.005961,-0.050901,-0.229324,-0.122421,-0.331202,-0.019986,-0.101839,-0.24505,-0.051652,-0.456677,-0.033475,-0.128759,-0.080313,-0.059051,-0.015493,-0.06377,-0.073248,,-0.080794,-0.125771,-0.011323,-0.116921,0.051302,-0.062114,-0.05825,-0.018025,-0.017433,-0.036091,-0.033809,-0.025577,-0.023001,-0.015221,-0.048036,-0.007325,-0.318884,-0.039984,-0.05994,-0.009467,0.50525,-0.078859,-0.101097,-0.113138,1.765834,-0.056281,-0.019416,-0.094575,-0.031043,-0.302369,-0.135672,-0.032676,-0.07738,-0.236518,,-0.022385,-0.380742,-0.509578,-0.246198,,-0.083652,-0.00957,-0.006186,-0.034974,-0.010496,-0.080347,,,-0.233142,-0.317366,-0.027186,-0.547415,-0.065005,-0.040324,-0.028479,3.135208,-0.010211,-0.01351,-0.197118,0


In [8]:
# отбрасываем константные значения
dataset_0_200 = variance_filter(dataset_0_200, 0.5)


In [9]:
# считаем корреляцию с таргетом
corr_0_200 = check_corr(dataset_0_200)
corr_0_200

markers_40_1_cnt     0.014241
markers_32_1_cnt     0.010720
markers_103_1_cnt    0.009781
markers_2_1_cnt      0.009140
markers_171_1_cnt    0.008179
                       ...   
markers_65_1_cnt    -0.005823
markers_199_1_cnt   -0.006428
markers_72_1_cnt    -0.008867
markers_185_1_cnt   -0.009140
markers_122_1_cnt   -0.011679
Length: 197, dtype: float64

Очевидно значимых признаков нет

### Столбцы с 200 по 399

In [10]:
# считываем необходимое количество столбцов
dataset_200_400 = read_data(dataset_train_path, 200, 400)
dataset_200_400.head()


Unnamed: 0,markers_208_1_cnt,markers_209_1_cnt,markers_210_1_cnt,markers_211_1_cnt,markers_212_1_cnt,markers_213_1_cnt,markers_215_1_cnt,markers_216_1_cnt,markers_217_1_cnt,markers_218_1_cnt,markers_219_1_cnt,markers_220_1_cnt,markers_221_1_cnt,markers_222_1_cnt,markers_223_1_cnt,markers_224_1_cnt,markers_225_1_cnt,markers_226_1_cnt,markers_227_1_cnt,markers_228_1_cnt,markers_229_1_cnt,markers_230_1_cnt,markers_231_1_cnt,markers_232_1_cnt,markers_233_1_cnt,markers_234_1_cnt,markers_235_1_cnt,markers_236_1_cnt,markers_237_1_cnt,markers_238_1_cnt,markers_239_1_cnt,markers_240_1_cnt,markers_241_1_cnt,markers_242_1_cnt,markers_243_1_cnt,markers_244_1_cnt,markers_246_1_cnt,markers_247_1_cnt,markers_248_1_cnt,markers_249_1_cnt,markers_251_1_cnt,markers_252_1_cnt,markers_253_1_cnt,markers_254_1_cnt,markers_255_1_cnt,markers_256_1_cnt,markers_257_1_cnt,markers_258_1_cnt,markers_259_1_cnt,markers_260_1_cnt,markers_261_1_cnt,markers_262_1_cnt,markers_263_1_cnt,markers_264_1_cnt,markers_265_1_cnt,markers_266_1_cnt,markers_267_1_cnt,markers_268_1_cnt,markers_269_1_cnt,markers_270_1_cnt,markers_271_1_cnt,markers_272_1_cnt,markers_273_1_cnt,markers_274_1_cnt,markers_275_1_cnt,markers_276_1_cnt,markers_277_1_cnt,markers_279_1_cnt,markers_280_1_cnt,markers_281_1_cnt,markers_282_1_cnt,markers_283_1_cnt,markers_284_1_cnt,markers_285_1_cnt,markers_286_1_cnt,markers_287_1_cnt,markers_288_1_cnt,markers_289_1_cnt,markers_290_1_cnt,markers_291_1_cnt,markers_292_1_cnt,markers_293_1_cnt,markers_294_1_cnt,markers_295_1_cnt,markers_296_1_cnt,markers_297_1_cnt,markers_298_1_cnt,markers_299_1_cnt,markers_300_1_cnt,markers_301_1_cnt,markers_302_1_cnt,markers_303_1_cnt,markers_304_1_cnt,markers_305_1_cnt,markers_306_1_cnt,markers_307_1_cnt,markers_308_1_cnt,markers_309_1_cnt,markers_310_1_cnt,markers_312_1_cnt,markers_313_1_cnt,markers_314_1_cnt,markers_315_1_cnt,markers_316_1_cnt,markers_317_1_cnt,markers_318_1_cnt,markers_319_1_cnt,markers_320_1_cnt,markers_321_1_cnt,markers_322_1_cnt,markers_323_1_cnt,markers_324_1_cnt,markers_325_1_cnt,markers_326_1_cnt,markers_327_1_cnt,markers_328_1_cnt,markers_329_1_cnt,markers_330_1_cnt,markers_331_1_cnt,markers_332_1_cnt,markers_333_1_cnt,markers_334_1_cnt,markers_335_1_cnt,markers_336_1_cnt,markers_337_1_cnt,markers_338_1_cnt,markers_339_1_cnt,markers_340_1_cnt,markers_341_1_cnt,markers_342_1_cnt,markers_343_1_cnt,markers_344_1_cnt,markers_345_1_cnt,markers_346_1_cnt,markers_347_1_cnt,markers_348_1_cnt,markers_349_1_cnt,markers_350_1_cnt,markers_351_1_cnt,markers_352_1_cnt,markers_353_1_cnt,markers_354_1_cnt,markers_355_1_cnt,markers_356_1_cnt,markers_357_1_cnt,markers_358_1_cnt,markers_359_1_cnt,markers_360_1_cnt,markers_361_1_cnt,markers_362_1_cnt,markers_363_1_cnt,markers_364_1_cnt,markers_365_1_cnt,markers_366_1_cnt,markers_367_1_cnt,markers_368_1_cnt,markers_369_1_cnt,markers_370_1_cnt,markers_371_1_cnt,markers_372_1_cnt,markers_373_1_cnt,markers_374_1_cnt,markers_375_1_cnt,markers_376_1_cnt,markers_377_1_cnt,markers_378_1_cnt,markers_379_1_cnt,markers_380_1_cnt,markers_381_1_cnt,markers_382_1_cnt,markers_383_1_cnt,markers_384_1_cnt,markers_385_1_cnt,markers_386_1_cnt,markers_387_1_cnt,markers_388_1_cnt,markers_389_1_cnt,markers_390_1_cnt,markers_391_1_cnt,markers_392_1_cnt,markers_393_1_cnt,markers_394_1_cnt,markers_395_1_cnt,markers_396_1_cnt,markers_397_1_cnt,markers_398_1_cnt,markers_399_1_cnt,markers_400_1_cnt,markers_401_1_cnt,markers_402_1_cnt,markers_403_1_cnt,markers_404_1_cnt,markers_405_1_cnt,markers_406_1_cnt,markers_407_1_cnt,markers_408_1_cnt,markers_409_1_cnt,markers_410_1_cnt,markers_411_1_cnt,markers_412_1_cnt,target
0,-0.031107,-0.104697,-0.025089,-0.181242,-0.068119,-0.083954,-0.226446,-0.069071,-0.073598,-0.077716,-0.039401,-0.024738,-0.044995,-0.078226,-0.052385,,-0.339619,-0.036073,-0.163505,-0.023135,-0.196457,-0.042695,-0.108565,-0.889347,,-0.113154,,-0.171718,-0.18704,-0.702268,-0.04025,-0.025074,,-0.762698,-0.010933,-0.017246,-0.062057,-0.079599,-0.027791,-0.177535,-0.066134,-0.029447,,-0.015493,-0.013267,-0.046825,,,,-0.035892,-0.482181,-0.133751,-0.027934,-0.040349,-0.043747,-0.118017,-0.00798,-0.235713,-0.323606,-0.033686,-0.20979,,-0.097387,-0.155045,-0.033115,-0.068745,-0.038787,-0.025502,-0.072504,-0.210132,-0.122785,-0.297036,-0.083034,,-0.059451,-0.040893,-0.050708,-0.009665,-0.011312,-0.310166,-0.020705,,-0.215788,-0.024511,-0.051416,,-0.31395,-0.246508,-0.015367,-0.01159,-0.374286,-0.115366,-0.019642,-0.01491,-0.364783,,-0.059096,-0.022878,-0.726092,-0.126246,,,,-0.084372,-0.017879,-0.813515,-0.191185,-0.039766,-0.007581,-0.237834,-0.533167,-0.156313,-0.3205,-0.084362,-0.176528,-0.351619,-0.023757,-0.916329,-0.146687,-0.040484,-0.712084,-0.521498,-0.028226,-0.026531,-0.029163,,-0.088223,-0.26639,-0.033651,-0.029802,-0.056229,-0.019045,-0.064245,0.435087,-0.422013,-1.741738,-1.64269,-0.009412,-0.048192,-0.117481,-0.195268,-0.116534,-0.084806,-0.094771,-0.089401,-0.009505,-0.037178,-0.027517,-0.125197,-0.052211,-0.085236,-0.426208,-0.049087,-0.01233,-0.224983,-0.008289,0.0,-0.016475,-0.023542,-0.027914,-0.034629,-0.015795,,-0.092385,-0.14511,-0.417204,-0.085825,-0.655052,-0.048717,-0.225042,-0.108254,-0.210142,-0.132688,,-0.608426,-0.016595,-0.293084,-0.221544,-0.054876,-0.030093,-0.102983,-0.055153,-0.026133,-0.066554,-0.111961,-0.227806,-0.032447,-0.090546,-0.259924,,-0.21341,-0.106643,-0.279446,-0.113878,-0.225794,-0.014865,-0.257516,-0.138496,-0.142896,-0.131761,0
1,-0.031107,-0.104697,-0.025089,-0.181242,-0.068119,-0.083954,-0.226446,-0.069071,-0.073598,-0.077716,-0.039401,-0.024738,-0.044995,-0.078226,-0.052385,,-0.339619,-0.036073,-0.163505,-0.023135,-0.196457,-0.042695,-0.108565,-0.889347,-0.015364,-0.113154,,-0.171718,-0.18704,-0.702268,-0.04025,-0.025074,,-0.762698,-0.010933,-0.017246,-0.062057,-0.079599,-0.027791,-0.177535,-0.066134,-0.029447,-0.01045,-0.015493,-0.013267,-0.046825,-0.033169,,-0.011816,-0.035892,-0.482181,-0.133751,-0.027934,-0.040349,-0.043747,-0.118017,-0.00798,-0.235713,-0.323606,-0.033686,-0.20979,-0.069916,-0.097387,-0.155045,-0.033115,-0.068745,-0.038787,-0.025502,-0.072504,-0.210132,-0.122785,-0.297036,-0.083034,-0.013289,-0.059451,-0.040893,-0.050708,-0.009665,-0.011312,-0.310166,-0.020705,-0.487459,-0.215788,-0.024511,-0.051416,,-0.31395,-0.246508,-0.015367,-0.01159,-0.374286,-0.115366,-0.019642,-0.01491,-0.364783,,-0.059096,-0.022878,-0.726092,-0.126246,,,-0.02867,-0.084372,-0.017879,-0.813515,-0.191185,-0.039766,-0.007581,-0.237834,-0.533167,-1.133177,-0.3205,-0.084362,-0.176528,-0.351619,-0.023757,-1.427334,-0.146687,-0.040484,-1.039032,-0.381114,-0.028226,-0.026531,-0.029163,-0.130332,-0.088223,-0.26639,-0.033651,-0.029802,-0.056229,-0.019045,6.637127,-0.240076,-0.422013,-0.967099,-0.884461,-0.009412,-0.048192,-0.117481,-0.195268,-0.116534,-0.084806,-0.094771,-0.089401,-0.009505,-0.037178,-0.027517,-0.125197,-0.052211,-0.085236,-0.426208,-0.049087,-0.01233,-0.224983,-0.008289,0.0,-0.016475,-0.023542,-0.027914,-0.034629,-0.015795,-0.034767,-0.092385,-0.14511,-0.417204,-0.085825,-0.655052,-0.048717,-0.225042,-0.108254,-0.210142,-0.132688,,-0.608426,-0.016595,-0.293084,-0.221544,-0.054876,-0.030093,-0.102983,-0.055153,-0.026133,-0.066554,-0.111961,-0.227806,-0.032447,-0.090546,-0.259924,,-0.21341,-0.106643,-0.279446,-0.113878,-0.225794,-0.014865,-0.257516,-0.138496,-0.142896,-0.131761,0
2,-0.031107,-0.104697,-0.025089,-0.181242,-0.068119,-0.083954,-0.226446,-0.069071,-0.073598,-0.077716,-0.039401,-0.024738,-0.044995,-0.078226,-0.052385,,-0.339619,-0.036073,-0.163505,-0.023135,-0.196457,-0.042695,-0.108565,-0.26935,-0.015364,-0.113154,,-0.171718,-0.18704,-0.165392,-0.04025,-0.025074,,1.015372,-0.010933,-0.017246,-0.062057,-0.079599,-0.027791,-0.177535,-0.066134,-0.029447,-0.01045,-0.015493,-0.013267,-0.046825,-0.033169,,-0.011816,-0.035892,0.965944,-0.133751,-0.027934,-0.040349,-0.043747,-0.118017,-0.00798,-0.235713,-0.323606,-0.033686,-0.20979,0.793732,-0.097387,-0.155045,-0.033115,-0.068745,-0.038787,-0.025502,-0.072504,-0.210132,-0.122785,-0.297036,-0.083034,-0.013289,-0.059451,-0.040893,-0.050708,-0.009665,-0.011312,0.177595,-0.020705,2.057844,-0.215788,-0.024511,-0.051416,,-0.31395,-0.246508,-0.015367,-0.01159,-0.374286,-0.115366,-0.019642,-0.01491,-0.364783,,-0.059096,-0.022878,1.428355,-0.126246,,,-0.02867,-0.084372,-0.017879,-0.704715,-0.191185,-0.039766,-0.007581,-0.237834,-0.256655,0.332118,-0.3205,-0.084362,-0.176528,-0.351619,-0.023757,1.025491,-0.146687,-0.040484,-0.603102,-0.100347,-0.028226,-0.026531,-0.029163,-0.130332,-0.088223,5.217148,-0.033651,-0.029802,-0.056229,-0.019045,-0.064245,1.222778,-0.422013,-0.192459,0.916332,-0.009412,-0.048192,-0.117481,-0.195268,-0.116534,-0.084806,-0.094771,-0.089401,-0.009505,-0.037178,-0.027517,-0.125197,-0.052211,-0.085236,-0.426208,-0.049087,-0.01233,-0.224983,-0.008289,0.0,-0.016475,-0.023542,-0.027914,-0.034629,-0.015795,-0.034767,-0.092385,7.045282,-0.417204,-0.085825,0.136114,-0.048717,-0.225042,-0.108254,-0.210142,-0.132688,,-0.608426,-0.016595,-0.293084,-0.221544,-0.054876,-0.030093,-0.102983,-0.055153,-0.026133,-0.066554,-0.111961,-0.227806,-0.032447,-0.090546,5.277227,,-0.21341,-0.106643,-0.279446,-0.113878,-0.225794,-0.014865,-0.257516,-0.138496,-0.142896,-0.131761,1
3,-0.031107,-0.104697,-0.025089,0.613308,-0.068119,-0.083954,-0.226446,-0.069071,-0.073598,-0.077716,-0.039401,-0.024738,-0.044995,-0.078226,-0.052385,,0.143692,-0.036073,-0.163505,-0.023135,-0.196457,-0.042695,-0.108565,1.342641,-0.015364,-0.113154,-0.014469,-0.171718,-0.18704,0.371485,-0.04025,-0.025074,,2.460053,-0.010933,-0.017246,-0.062057,-0.079599,-0.027791,-0.177535,-0.066134,-0.029447,-0.01045,-0.015493,-0.013267,-0.046825,-0.033169,-0.037258,-0.011816,-0.035892,3.862195,-0.133751,-0.027934,-0.040349,-0.043747,-0.118017,-0.00798,2.632918,6.614226,-0.033686,2.048073,-0.069916,-0.097387,-0.155045,-0.033115,-0.068745,-0.038787,-0.025502,-0.072504,-0.210132,-0.122785,-0.098583,-0.083034,-0.013289,-0.059451,-0.040893,-0.050708,-0.009665,-0.011312,-0.310166,-0.020705,-0.063242,0.888765,-0.024511,-0.051416,-0.080755,-0.31395,0.625051,-0.015367,-0.01159,0.36309,-0.115366,-0.019642,-0.01491,-0.001325,-0.025893,-0.059096,-0.022878,-0.726092,-0.126246,-0.034705,-0.103572,-0.02867,-0.084372,-0.017879,1.58008,0.774115,-0.039766,-0.007581,4.383807,-0.533167,1.015922,4.854921,-0.084362,-0.176528,1.361742,-0.023757,1.332094,-0.146687,-0.040484,0.704688,-0.521498,-0.028226,-0.026531,-0.029163,-0.130332,-0.088223,1.927025,-0.033651,-0.029802,-0.056229,-0.019045,15.572289,1.110251,0.255345,1.024831,1.01111,-0.009412,-0.048192,-0.117481,-0.195268,-0.116534,-0.084806,-0.094771,-0.089401,-0.009505,-0.037178,-0.027517,-0.125197,-0.052211,-0.085236,-0.426208,-0.049087,-0.01233,-0.224983,-0.008289,0.0,-0.016475,-0.023542,-0.027914,-0.034629,-0.015795,-0.034767,-0.092385,-0.14511,-0.417204,-0.085825,1.520653,4.555677,0.576206,-0.108254,-0.210142,-0.132688,,0.589922,-0.016595,4.796928,-0.221544,-0.054876,-0.030093,-0.102983,-0.055153,23.993859,-0.066554,-0.111961,-0.227806,-0.032447,-0.090546,2.970081,,6.072515,-0.106643,0.765318,-0.113878,-0.225794,-0.014865,-0.257516,-0.138496,-0.142896,-0.131761,0
4,-0.031107,-0.104697,-0.025089,-0.181242,-0.068119,-0.083954,-0.226446,-0.069071,-0.073598,-0.077716,-0.039401,-0.024738,-0.044995,-0.078226,-0.052385,,-0.339619,-0.036073,-0.163505,-0.023135,-0.196457,-0.042695,-0.108565,-0.765348,-0.015364,-0.113154,-0.014469,-0.171718,-0.18704,1.311019,-0.04025,-0.025074,,-0.762698,-0.010933,-0.017246,-0.062057,-0.079599,-0.027791,-0.177535,-0.066134,-0.029447,-0.01045,-0.015493,-0.013267,-0.046825,-0.033169,,-0.011816,-0.035892,-0.482181,-0.133751,-0.027934,-0.040349,-0.043747,-0.118017,-0.00798,-0.235713,-0.323606,-0.033686,-0.20979,-0.069916,-0.097387,-0.155045,-0.033115,-0.068745,-0.038787,-0.025502,-0.072504,-0.210132,-0.122785,5.061192,-0.083034,-0.013289,-0.059451,-0.040893,-0.050708,-0.009665,-0.011312,-0.310166,-0.020705,-0.487459,-0.215788,-0.024511,-0.051416,,-0.31395,0.189271,-0.015367,-0.01159,-0.374286,-0.115366,-0.019642,-0.01491,-0.364783,,-0.059096,-0.022878,-0.726092,-0.126246,,,-0.02867,-0.084372,-0.017879,-0.051916,-0.191185,-0.039766,-0.007581,-0.237834,3.614511,1.308981,-0.3205,-0.084362,-0.176528,-0.351619,-0.023757,1.332094,-0.146687,-0.040484,1.794513,-0.521498,-0.028226,-0.026531,-0.029163,-0.130332,-0.088223,0.830317,-0.033651,-0.029802,-0.056229,-0.019045,-0.064245,0.660142,-0.422013,-0.081796,-0.410568,-0.009412,-0.048192,-0.117481,-0.195268,-0.116534,-0.084806,-0.094771,-0.089401,-0.009505,-0.037178,-0.027517,-0.125197,-0.052211,-0.085236,4.06763,-0.049087,-0.01233,0.447723,-0.008289,0.0,-0.016475,-0.023542,-0.027914,-0.034629,-0.015795,-0.034767,-0.092385,-0.14511,-0.417204,-0.085825,0.531697,-0.048717,-0.225042,-0.108254,-0.210142,-0.132688,,-0.608426,-0.016595,2.839231,-0.221544,-0.054876,-0.030093,-0.102983,-0.055153,-0.026133,-0.066554,-0.111961,-0.227806,-0.032447,-0.090546,-0.259924,,-0.21341,-0.106643,-0.279446,-0.113878,-0.225794,-0.014865,-0.257516,-0.138496,0.267987,-0.131761,0


In [11]:
# отбрасываем константные значения
dataset_200_400 = variance_filter(dataset_200_400, 0.5)


In [12]:
# считаем корреляцию с таргетом
corr_200_400 = check_corr(dataset_200_400)
corr_200_400


markers_346_1_cnt    0.009750
markers_349_1_cnt    0.008966
markers_324_1_cnt    0.008129
markers_237_1_cnt    0.006892
markers_318_1_cnt    0.006859
                       ...   
markers_334_1_cnt   -0.005656
markers_376_1_cnt   -0.005760
markers_387_1_cnt   -0.006248
markers_249_1_cnt   -0.006698
markers_306_1_cnt   -0.006916
Length: 194, dtype: float64

### Столбцы с 400 по 599

In [13]:
# считываем необходимое количество столбцов
dataset_400_600 = read_data(dataset_train_path, 400, 600)
dataset_400_600.head()

Unnamed: 0,markers_413_1_cnt,markers_414_1_cnt,markers_415_1_cnt,markers_416_1_cnt,markers_417_1_cnt,markers_418_1_cnt,markers_419_1_cnt,markers_420_1_cnt,markers_421_1_cnt,markers_422_1_cnt,markers_423_1_cnt,markers_424_1_cnt,markers_425_1_cnt,markers_426_1_cnt,markers_427_1_cnt,markers_428_1_cnt,markers_429_1_cnt,markers_430_1_cnt,markers_431_1_cnt,markers_432_1_cnt,markers_433_1_cnt,markers_434_1_cnt,markers_435_1_cnt,markers_436_1_cnt,markers_437_1_cnt,markers_438_1_cnt,markers_439_1_cnt,markers_440_1_cnt,markers_441_1_cnt,markers_442_1_cnt,markers_443_1_cnt,markers_444_1_cnt,markers_445_1_cnt,markers_446_1_cnt,markers_447_1_cnt,markers_449_1_cnt,markers_450_1_cnt,markers_451_1_cnt,markers_452_1_cnt,markers_453_1_cnt,markers_454_1_cnt,markers_455_1_cnt,markers_456_1_cnt,markers_457_1_cnt,markers_458_1_cnt,markers_459_1_cnt,markers_460_1_cnt,markers_461_1_cnt,markers_462_1_cnt,markers_463_1_cnt,markers_465_1_cnt,markers_466_1_cnt,markers_467_1_cnt,markers_468_1_cnt,markers_469_1_cnt,markers_470_1_cnt,markers_471_1_cnt,markers_472_1_cnt,markers_473_1_cnt,markers_474_1_cnt,markers_475_1_cnt,markers_476_1_cnt,markers_477_1_cnt,markers_478_1_cnt,markers_479_1_cnt,markers_480_1_cnt,markers_481_1_cnt,markers_482_1_cnt,markers_483_1_cnt,markers_484_1_cnt,markers_485_1_cnt,markers_486_1_cnt,markers_487_1_cnt,markers_488_1_cnt,markers_489_1_cnt,markers_490_1_cnt,markers_491_1_cnt,markers_493_1_cnt,markers_494_1_cnt,markers_495_1_cnt,markers_496_1_cnt,markers_497_1_cnt,markers_498_1_cnt,markers_499_1_cnt,markers_500_1_cnt,markers_501_1_cnt,markers_502_1_cnt,markers_503_1_cnt,markers_504_1_cnt,markers_505_1_cnt,markers_506_1_cnt,markers_507_1_cnt,markers_508_1_cnt,markers_509_1_cnt,markers_510_1_cnt,markers_511_1_cnt,markers_512_1_cnt,markers_513_1_cnt,markers_514_1_cnt,markers_515_1_cnt,markers_516_1_cnt,markers_517_1_cnt,markers_518_1_cnt,markers_519_1_cnt,markers_520_1_cnt,markers_521_1_cnt,markers_522_1_cnt,markers_523_1_cnt,markers_524_1_cnt,markers_525_1_cnt,markers_526_1_cnt,markers_527_1_cnt,markers_528_1_cnt,markers_529_1_cnt,markers_530_1_cnt,markers_531_1_cnt,markers_532_1_cnt,markers_533_1_cnt,markers_534_1_cnt,markers_535_1_cnt,markers_536_1_cnt,markers_537_1_cnt,markers_538_1_cnt,markers_539_1_cnt,markers_540_1_cnt,markers_541_1_cnt,markers_542_1_cnt,markers_543_1_cnt,markers_544_1_cnt,markers_545_1_cnt,markers_546_1_cnt,markers_547_1_cnt,markers_548_1_cnt,markers_549_1_cnt,markers_550_1_cnt,markers_551_1_cnt,markers_552_1_cnt,markers_553_1_cnt,markers_554_1_cnt,markers_555_1_cnt,markers_556_1_cnt,markers_557_1_cnt,markers_558_1_cnt,markers_559_1_cnt,markers_560_1_cnt,markers_561_1_cnt,markers_562_1_cnt,markers_563_1_cnt,markers_564_1_cnt,markers_565_1_cnt,markers_566_1_cnt,markers_567_1_cnt,markers_568_1_cnt,markers_569_1_cnt,markers_570_1_cnt,markers_571_1_cnt,markers_572_1_cnt,markers_573_1_cnt,markers_574_1_cnt,markers_575_1_cnt,markers_576_1_cnt,markers_577_1_cnt,markers_578_1_cnt,markers_579_1_cnt,markers_580_1_cnt,markers_581_1_cnt,markers_582_1_cnt,markers_583_1_cnt,markers_584_1_cnt,markers_585_1_cnt,markers_586_1_cnt,markers_587_1_cnt,markers_588_1_cnt,markers_589_1_cnt,markers_590_1_cnt,markers_591_1_cnt,markers_592_1_cnt,markers_593_1_cnt,markers_594_1_cnt,markers_595_1_cnt,markers_596_1_cnt,markers_597_1_cnt,markers_598_1_cnt,markers_599_1_cnt,markers_600_1_cnt,markers_601_1_cnt,markers_602_1_cnt,markers_603_1_cnt,markers_604_1_cnt,markers_605_1_cnt,markers_606_1_cnt,markers_607_1_cnt,markers_608_1_cnt,markers_609_1_cnt,markers_610_1_cnt,markers_611_1_cnt,markers_612_1_cnt,markers_613_1_cnt,markers_614_1_cnt,markers_615_1_cnt,target
0,-0.094348,-0.114231,-0.103785,-0.014724,-0.106822,-0.016147,-0.204772,-0.018454,-0.12654,-0.087154,-0.08556,-0.668222,-0.126213,-0.25776,-0.011725,-0.160691,-0.023111,-0.109159,-0.333867,-0.028059,-0.049576,-0.402016,-0.043134,-0.135613,-0.037106,-0.628057,-0.139858,-0.108334,-0.081015,-0.346151,-0.139633,-0.332625,-0.170424,-0.104848,-0.059663,-0.016757,-0.037248,-0.1055,-0.139314,-0.139298,-0.009067,-0.04067,-0.027545,-0.037021,-0.0707,-0.14154,-0.277276,-0.064306,-0.044453,-0.02381,-0.204037,-0.222391,-0.129192,-0.327212,-0.193407,-0.32745,-0.033659,-0.076791,-0.075192,-0.021225,-0.018161,-0.764699,-0.113743,-0.118718,-0.086824,-0.420253,,-0.185849,-0.150162,-0.003255,-0.039487,-0.055295,,-0.012105,-0.051669,-0.104019,-0.055239,-0.172291,-0.079972,-0.139841,-0.010306,-0.031611,-0.094306,-0.161854,-0.045909,-0.39565,-0.356033,-0.026626,-0.093756,-0.009562,-0.390416,-0.113771,0.990308,-0.13648,-0.025319,-0.161761,-0.049115,,-0.174958,-0.010386,-0.060755,-0.106234,-0.082922,-0.243493,-0.10661,-0.160228,-0.159016,-0.032302,-0.031345,-0.013822,-0.084879,-0.143344,-0.011156,-0.083002,-0.161573,-0.043591,-0.271777,-1.025351,-0.543963,-1.005866,1.025446,-0.730316,-0.029878,-0.110048,-0.119978,-0.210989,0.008344,-0.06042,-0.046641,-0.100605,-0.113869,-0.075577,-0.015607,-0.034906,-0.072104,-0.031197,-0.012524,-0.040826,-0.077188,-0.035819,-0.041826,-0.020817,-0.03591,-0.032743,-0.023932,-0.342972,-0.269591,-0.017916,-0.26714,-0.037873,-0.107424,-0.171404,-0.073332,-0.267721,-0.460965,-0.096004,-0.033045,-0.070553,-0.119537,-0.193483,-0.596546,-0.036932,-0.102457,-0.05176,-0.153059,-0.048758,-0.093987,-0.014545,-0.033104,-0.165655,-0.018444,-0.059063,-0.13098,-0.363023,-0.037602,-0.016419,-0.031796,-0.020199,-0.017595,-0.041304,-0.008156,-0.089443,-0.074701,-0.025582,-0.011009,-0.118748,-0.015075,-0.120692,-0.241715,-0.008218,-0.18051,-0.023455,-0.041364,-0.012338,-0.084865,-0.254291,-0.261543,-0.39872,-0.017229,-0.070645,0
1,-0.094348,-0.114231,-0.103785,-0.014724,-0.106822,-0.016147,-0.204772,-0.018454,-0.12654,-0.087154,-0.08556,-0.568285,-0.126213,-0.25776,-0.011725,-0.160691,-0.023111,-0.109159,-0.333867,-0.028059,-0.049576,-0.402016,-0.043134,-0.135613,-0.037106,-0.628057,-0.139858,-0.108334,-0.081015,-0.346151,-0.139633,-0.332625,-0.170424,-0.104848,-0.059663,-0.016757,-0.037248,-0.1055,-0.139314,-0.139298,-0.009067,-0.04067,-0.027545,-0.037021,-0.0707,-0.14154,5.440061,-0.064306,-0.044453,-0.02381,-0.204037,-0.222391,-0.129192,-0.327212,-0.193407,-0.32745,-0.033659,-0.076791,-0.075192,-0.021225,-0.018161,-0.091028,-0.113743,-0.118718,-0.086824,-0.420253,,-0.185849,-0.150162,-0.003255,-0.039487,-0.055295,,-0.012105,-0.051669,-0.104019,-0.055239,-0.172291,-0.079972,-0.139841,-0.010306,-0.031611,-0.094306,-0.161854,-0.045909,-0.39565,-0.356033,-0.026626,-0.093756,-0.009562,-0.390416,-0.113771,-0.53191,-0.13648,-0.025319,-0.161761,-0.049115,,-0.174958,-0.010386,-0.060755,-0.106234,-0.082922,-0.243493,-0.10661,-0.160228,-0.159016,-0.032302,-0.031345,-0.013822,-0.084879,-0.143344,-0.011156,-0.083002,-0.161573,-0.043591,-0.271777,-1.025351,-0.543963,-1.005866,-0.070698,-0.730316,-0.029878,-0.110048,-0.119978,-0.210989,-0.440288,-0.06042,-0.046641,-0.100605,-0.113869,-0.075577,-0.015607,-0.034906,-0.072104,-0.031197,-0.012524,-0.040826,-0.077188,-0.035819,-0.041826,-0.020817,-0.03591,-0.032743,-0.023932,-0.342972,-0.269591,-0.017916,-0.26714,-0.037873,-0.107424,-0.171404,-0.073332,-0.267721,-0.460965,-0.096004,-0.033045,-0.070553,-0.119537,-0.193483,-0.2213,-0.036932,-0.102457,-0.05176,-0.153059,-0.048758,-0.093987,-0.014545,-0.033104,-0.165655,-0.018444,-0.059063,-0.13098,-0.363023,-0.037602,-0.016419,-0.031796,-0.020199,-0.017595,-0.041304,-0.008156,-0.089443,-0.074701,-0.025582,-0.011009,-0.118748,-0.015075,-0.120692,-0.241715,-0.008218,-0.18051,-0.023455,-0.041364,-0.012338,-0.084865,-0.254291,-0.261543,-0.39872,-0.017229,-0.070645,0
2,-0.094348,-0.114231,-0.103785,-0.014724,-0.106822,-0.016147,-0.204772,-0.018454,-0.12654,-0.087154,-0.08556,-0.068602,-0.126213,-0.25776,-0.011725,-0.160691,-0.023111,-0.109159,0.247093,-0.028059,-0.049576,-0.402016,-0.043134,-0.135613,-0.037106,-0.628057,-0.139858,-0.108334,-0.081015,-0.346151,-0.139633,-0.332625,-0.170424,-0.104848,-0.059663,-0.016757,-0.037248,-0.1055,-0.139314,-0.139298,-0.009067,-0.04067,-0.027545,-0.037021,-0.0707,-0.14154,0.314173,-0.064306,-0.044453,-0.02381,-0.204037,-0.222391,-0.129192,-0.327212,-0.193407,-0.32745,-0.033659,-0.076791,-0.075192,-0.021225,-0.018161,-0.764699,-0.113743,-0.118718,-0.086824,-0.420253,,-0.185849,-0.150162,-0.003255,-0.039487,-0.055295,,-0.012105,-0.051669,-0.104019,-0.055239,-0.172291,-0.079972,-0.139841,-0.010306,-0.031611,-0.094306,-0.161854,-0.045909,-0.39565,-0.356033,-0.026626,-0.093756,-0.009562,0.114475,-0.113771,-0.53191,-0.13648,-0.025319,-0.161761,-0.049115,,-0.174958,-0.010386,-0.060755,-0.106234,-0.082922,-0.243493,-0.10661,-0.160228,-0.159016,-0.032302,-0.031345,-0.013822,1.119598,-0.143344,-0.011156,-0.083002,-0.161573,-0.043591,-0.271777,0.906383,-0.543963,0.987859,-0.070698,0.745062,-0.029878,-0.110048,-0.119978,-0.210989,-0.88892,-0.06042,-0.046641,-0.100605,-0.113869,-0.075577,-0.015607,-0.034906,-0.072104,-0.031197,-0.012524,-0.040826,-0.077188,-0.035819,-0.041826,-0.020817,-0.03591,-0.032743,-0.023932,-0.342972,-0.269591,-0.017916,-0.26714,-0.037873,-0.107424,-0.171404,-0.073332,-0.267721,-0.460965,-0.096004,-0.033045,-0.070553,-0.119537,2.851535,-0.596546,-0.036932,-0.102457,-0.05176,-0.153059,-0.048758,-0.093987,-0.014545,-0.033104,-0.165655,-0.018444,-0.059063,-0.13098,-0.363023,-0.037602,-0.016419,-0.031796,-0.020199,-0.017595,-0.041304,-0.008156,-0.089443,-0.074701,-0.025582,-0.011009,-0.118748,-0.015075,-0.120692,-0.241715,-0.008218,-0.18051,-0.023455,-0.041364,-0.012338,-0.084865,-0.254291,-0.261543,-0.39872,-0.017229,-0.070645,1
3,-0.094348,-0.114231,-0.103785,-0.014724,7.864049,-0.016147,-0.204772,-0.018454,-0.12654,-0.087154,-0.08556,2.229942,-0.126213,2.302074,-0.011725,-0.160691,-0.023111,-0.109159,-0.333867,-0.028059,-0.049576,-0.402016,-0.043134,-0.135613,-0.037106,1.206279,-0.139858,-0.108334,-0.081015,-0.346151,-0.139633,2.143173,0.504151,-0.104848,-0.059663,-0.016757,-0.037248,4.533502,-0.139314,-0.139298,-0.009067,-0.04067,-0.027545,-0.037021,-0.0707,-0.14154,-0.277276,-0.064306,-0.044453,-0.02381,-0.204037,4.399352,-0.129192,-0.327212,-0.193407,-0.32745,-0.033659,-0.076791,-0.075192,-0.021225,-0.018161,0.87136,-0.113743,-0.118718,-0.086824,4.076376,,-0.185849,3.96688,-0.003255,-0.039487,-0.055295,-0.12244,-0.012105,-0.051669,-0.104019,-0.055239,-0.172291,-0.079972,-0.139841,-0.010306,-0.031611,-0.094306,-0.161854,-0.045909,1.971841,0.586457,-0.026626,-0.093756,-0.009562,-0.390416,-0.113771,-0.53191,-0.13648,-0.025319,-0.161761,-0.049115,,-0.174958,-0.010386,-0.060755,-0.106234,7.854456,-0.243493,-0.10661,-0.160228,2.680317,-0.032302,-0.031345,-0.013822,-0.084879,-0.143344,-0.011156,-0.083002,-0.161573,-0.043591,1.412946,1.87225,-0.543963,1.984722,-0.070698,2.367979,-0.029878,-0.110048,-0.119978,-0.210989,1.204696,-0.06042,-0.046641,-0.100605,-0.113869,-0.075577,-0.015607,-0.034906,-0.072104,-0.031197,-0.012524,-0.040826,-0.077188,-0.035819,-0.041826,-0.020817,-0.03591,-0.032743,-0.023932,-0.342972,2.684357,-0.017916,-0.26714,-0.037873,-0.107424,-0.171404,-0.073332,-0.267721,0.668801,-0.096004,-0.033045,-0.070553,-0.119537,-0.193483,-0.596546,-0.036932,-0.102457,-0.05176,2.083583,-0.048758,-0.093987,-0.014545,-0.033104,-0.165655,-0.018444,-0.059063,-0.13098,-0.363023,-0.037602,-0.016419,-0.031796,-0.020199,-0.017595,-0.041304,-0.008156,-0.089443,-0.074701,-0.025582,-0.011009,-0.118748,-0.015075,-0.120692,-0.241715,-0.008218,-0.18051,-0.023455,-0.041364,-0.012338,-0.084865,0.274381,0.333039,-0.39872,-0.017229,-0.070645,0
4,-0.094348,-0.114231,-0.103785,-0.014724,-0.106822,-0.016147,-0.204772,-0.018454,-0.12654,-0.087154,-0.08556,1.630322,-0.126213,-0.25776,-0.011725,-0.160691,-0.023111,-0.109159,-0.333867,-0.028059,-0.049576,-0.109618,-0.043134,-0.135613,-0.037106,2.123447,-0.139858,-0.108334,-0.081015,-0.346151,-0.139633,-0.332625,-0.170424,-0.104848,-0.059663,-0.016757,-0.037248,-0.1055,-0.139314,-0.139298,-0.009067,-0.04067,-0.027545,-0.037021,-0.0707,-0.14154,-0.277276,-0.064306,-0.044453,-0.02381,-0.204037,-0.222391,-0.129192,-0.068769,-0.193407,-0.32745,-0.033659,-0.076791,-0.075192,-0.021225,-0.018161,-0.091028,-0.113743,-0.118718,-0.086824,-0.110141,,-0.185849,-0.150162,-0.003255,-0.039487,-0.055295,,-0.012105,-0.051669,-0.104019,-0.055239,-0.172291,-0.079972,-0.139841,-0.010306,-0.031611,-0.094306,-0.161854,-0.045909,0.393514,3.413928,-0.026626,-0.093756,-0.009562,-0.390416,-0.113771,0.990308,-0.13648,-0.025319,-0.161761,-0.049115,,-0.174958,-0.010386,-0.060755,-0.106234,-0.082922,-0.243493,-0.10661,-0.160228,-0.159016,-0.032302,-0.031345,-0.013822,-0.084879,-0.143344,-0.011156,-0.083002,-0.161573,-0.043591,-0.271777,-0.663151,1.794584,-0.632043,-0.070698,-0.730316,-0.029878,-0.110048,-0.119978,1.395157,0.456976,-0.06042,-0.046641,-0.100605,-0.113869,-0.075577,-0.015607,-0.034906,-0.072104,-0.031197,-0.012524,-0.040826,-0.077188,-0.035819,-0.041826,-0.020817,-0.03591,-0.032743,-0.023932,-0.342972,-0.269591,-0.017916,-0.26714,-0.037873,-0.107424,-0.171404,-0.073332,-0.267721,1.924096,-0.096004,-0.033045,-0.070553,-0.119537,-0.193483,-0.596546,-0.036932,-0.102457,-0.05176,-0.153059,-0.048758,-0.093987,-0.014545,-0.033104,-0.165655,-0.018444,-0.059063,-0.13098,0.277352,-0.037602,-0.016419,-0.031796,-0.020199,-0.017595,-0.041304,-0.008156,-0.089443,-0.074701,-0.025582,-0.011009,1.082157,-0.015075,-0.120692,-0.241715,-0.008218,-0.18051,-0.023455,-0.041364,-0.012338,-0.084865,-0.254291,-0.261543,-0.39872,-0.017229,15.878591,0


In [14]:
# отбрасываем константные значения
dataset_400_600 = variance_filter(dataset_400_600, 0.5)


In [15]:
# считаем корреляцию с таргетом
corr_400_600 = check_corr(dataset_400_600)
corr_400_600

markers_476_1_cnt    0.012434
markers_434_1_cnt    0.011912
markers_567_1_cnt    0.011398
markers_506_1_cnt    0.011047
markers_508_1_cnt    0.010404
                       ...   
markers_465_1_cnt   -0.004971
markers_530_1_cnt   -0.005058
markers_611_1_cnt   -0.005200
markers_446_1_cnt   -0.005285
markers_444_1_cnt   -0.006422
Length: 195, dtype: float64

### Столбцы с 600 по 799

In [16]:
# считываем необходимое количество столбцов
dataset_600_800 = read_data(dataset_train_path, 600, 800)
dataset_600_800.head()

Unnamed: 0,markers_616_1_cnt,markers_617_1_cnt,markers_618_1_cnt,markers_619_1_cnt,markers_620_1_cnt,markers_621_1_cnt,markers_622_1_cnt,markers_623_1_cnt,markers_624_1_cnt,markers_626_1_cnt,markers_628_1_cnt,markers_629_1_cnt,markers_630_1_cnt,markers_631_1_cnt,markers_632_1_cnt,markers_633_1_cnt,markers_634_1_cnt,markers_635_1_cnt,markers_636_1_cnt,markers_637_1_cnt,markers_638_1_cnt,markers_639_1_cnt,markers_640_1_cnt,markers_641_1_cnt,markers_642_1_cnt,markers_643_1_cnt,markers_644_1_cnt,markers_645_1_cnt,markers_646_1_cnt,markers_647_1_cnt,markers_648_1_cnt,markers_649_1_cnt,markers_650_1_cnt,markers_651_1_cnt,markers_652_1_cnt,markers_653_1_cnt,markers_654_1_cnt,markers_655_1_cnt,markers_656_1_cnt,markers_657_1_cnt,markers_658_1_cnt,markers_659_1_cnt,markers_660_1_cnt,markers_661_1_cnt,markers_662_1_cnt,markers_663_1_cnt,markers_664_1_cnt,markers_665_1_cnt,markers_666_1_cnt,markers_667_1_cnt,markers_668_1_cnt,markers_669_1_cnt,markers_670_1_cnt,markers_671_1_cnt,markers_672_1_cnt,markers_673_1_cnt,markers_674_1_cnt,markers_675_1_cnt,markers_677_1_cnt,markers_678_1_cnt,markers_679_1_cnt,markers_680_1_cnt,markers_681_1_cnt,markers_682_1_cnt,markers_683_1_cnt,markers_684_1_cnt,markers_686_1_cnt,markers_687_1_cnt,markers_688_1_cnt,markers_689_1_cnt,markers_690_1_cnt,markers_691_1_cnt,markers_692_1_cnt,markers_693_1_cnt,markers_696_1_cnt,markers_697_1_cnt,markers_698_1_cnt,markers_699_1_cnt,markers_700_1_cnt,markers_701_1_cnt,markers_702_1_cnt,markers_703_1_cnt,markers_704_1_cnt,markers_705_1_cnt,markers_706_1_cnt,markers_707_1_cnt,markers_708_1_cnt,markers_709_1_cnt,markers_710_1_cnt,markers_711_1_cnt,markers_712_1_cnt,markers_713_1_cnt,markers_714_1_cnt,markers_715_1_cnt,markers_716_1_cnt,markers_717_1_cnt,markers_718_1_cnt,markers_719_1_cnt,markers_720_1_cnt,markers_721_1_cnt,markers_722_1_cnt,markers_723_1_cnt,markers_724_1_cnt,markers_725_1_cnt,markers_726_1_cnt,markers_727_1_cnt,markers_728_1_cnt,markers_729_1_cnt,markers_730_1_cnt,markers_731_1_cnt,markers_732_1_cnt,markers_733_1_cnt,markers_734_1_cnt,markers_735_1_cnt,markers_736_1_cnt,markers_737_1_cnt,markers_738_1_cnt,markers_739_1_cnt,markers_740_1_cnt,markers_741_1_cnt,markers_742_1_cnt,markers_743_1_cnt,markers_744_1_cnt,markers_745_1_cnt,markers_746_1_cnt,markers_747_1_cnt,markers_748_1_cnt,markers_749_1_cnt,markers_750_1_cnt,markers_751_1_cnt,markers_752_1_cnt,markers_753_1_cnt,markers_754_1_cnt,markers_755_1_cnt,markers_756_1_cnt,markers_757_1_cnt,markers_758_1_cnt,markers_759_1_cnt,markers_760_1_cnt,markers_761_1_cnt,markers_762_1_cnt,markers_763_1_cnt,markers_764_1_cnt,markers_765_1_cnt,markers_766_1_cnt,markers_767_1_cnt,markers_768_1_cnt,markers_769_1_cnt,markers_770_1_cnt,markers_771_1_cnt,markers_772_1_cnt,markers_773_1_cnt,markers_774_1_cnt,markers_775_1_cnt,markers_776_1_cnt,markers_777_1_cnt,markers_778_1_cnt,markers_779_1_cnt,markers_780_1_cnt,markers_781_1_cnt,markers_782_1_cnt,markers_783_1_cnt,markers_784_1_cnt,markers_785_1_cnt,markers_786_1_cnt,markers_787_1_cnt,markers_788_1_cnt,markers_789_1_cnt,markers_790_1_cnt,markers_791_1_cnt,markers_792_1_cnt,markers_793_1_cnt,markers_794_1_cnt,markers_795_1_cnt,markers_796_1_cnt,markers_797_1_cnt,markers_798_1_cnt,markers_799_1_cnt,markers_800_1_cnt,markers_801_1_cnt,markers_802_1_cnt,markers_803_1_cnt,markers_804_1_cnt,markers_805_1_cnt,spas_symptoms_agr_0_1_std,spas_symptoms_agr_1_1_sum,spas_symptoms_agr_2_3_avg,spas_symptoms_agr_3_3_std,spas_symptoms_agr_4_3_sum,spas_symptoms_agr_5_6_avg,spas_symptoms_agr_6_6_std,spas_symptoms_agr_7_6_sum,spas_symptoms_agr_8_12_avg,spas_symptoms_agr_9_12_std,spas_symptoms_agr_10_12_sum,spas_symptoms_agr_11_1_avg,spas_symptoms_agr_12_1_std,spas_symptoms_agr_13_1_sum,spas_symptoms_agr_14_3_avg,spas_symptoms_agr_15_3_std,target
0,-0.179294,-0.065754,-0.238323,-0.104036,-0.078483,-0.116726,-0.178096,-0.215031,,-0.092402,,-0.15854,-0.111964,-0.024089,-0.019059,-0.015788,-0.045439,-0.011497,,-0.054198,-0.087955,-0.12999,-0.11044,-0.055536,-0.03872,-0.086761,-0.051684,-0.011923,-0.012835,-0.013986,-0.021677,-0.07585,-0.0347,-0.100478,-0.009172,-0.031379,-0.057205,-0.353825,-0.024702,-0.11857,-0.133784,-0.147802,,-0.013262,-0.073694,-0.197899,-0.105715,-0.024919,-0.013528,-0.003289,-0.060935,-0.008311,-0.157523,-0.059689,-0.095306,-0.103032,-0.103366,-0.027662,-0.048216,-0.020925,-0.134269,-0.017703,-0.080102,-0.031835,-0.072186,-0.02564,-0.133705,-0.053682,-0.07728,-0.061905,-0.053008,,-0.012107,-0.174107,-0.07607,-0.014058,,-0.046566,,-0.466228,,-0.004508,-0.039244,-0.043952,-0.357336,-0.037361,-0.007714,-0.076255,-0.007145,,-0.086011,-0.048232,-0.189618,-0.014522,-0.034652,-0.170406,,,-0.148037,-0.193187,,-0.018164,-0.077377,-0.046868,-0.31766,-0.05416,-0.054508,-0.032307,-0.188967,-0.089113,,-0.117366,-0.280807,-0.022383,-0.036359,-0.073409,-0.139922,-0.145881,-0.26321,-0.033085,-0.287868,-0.055048,-0.031978,-0.355535,-0.017009,-0.005439,-0.063678,-0.046774,-0.326752,-0.041658,-0.181834,-0.473676,-0.06563,-0.130298,-0.345945,-0.040255,-0.031672,-0.061696,-0.418728,-0.19764,-0.095903,-0.108177,-0.035217,-0.213517,-0.030448,-0.185723,-0.029924,-0.15811,-0.218615,-0.076133,-0.742936,-0.014784,-0.05387,-0.01514,-0.139232,-0.107663,-0.190703,-0.007915,-0.026327,-0.047214,-0.024438,-0.034565,-0.112963,-0.038788,-0.116006,-0.06547,-0.206636,-0.212451,-0.009755,-0.111689,-0.357055,-0.117005,-0.093901,-0.033756,-0.187771,-0.0051,-0.00963,-0.007384,-0.011695,-0.065081,-0.009588,-0.020736,-0.149392,-0.3601,-0.42227,0.469209,0.417891,-0.510571,0.458947,0.417272,-0.548222,0.475338,-0.417072,-0.570005,-0.406802,-0.413013,-0.458774,-0.41186,-0.416452,-0.542811,0
1,-0.179294,-0.065754,-0.238323,-0.104036,-0.078483,-0.116726,-0.178096,-0.215031,,-0.092402,-0.082871,-0.15854,-0.111964,-0.024089,-0.019059,-0.015788,-0.045439,-0.011497,-0.008705,-0.054198,-0.087955,-0.12999,-0.11044,-0.055536,-0.03872,-0.086761,-0.051684,-0.011923,-0.012835,-0.013986,-0.021677,-0.07585,-0.0347,-0.100478,-0.009172,-0.031379,-0.057205,-0.353825,-0.024702,-0.11857,-0.133784,-0.147802,,-0.013262,-0.073694,-0.197899,-0.105715,-0.024919,-0.013528,-0.003289,-0.060935,-0.008311,-0.157523,-0.059689,-0.095306,-0.103032,-0.103366,-0.027662,-0.048216,-0.020925,-0.134269,-0.017703,-0.080102,-0.031835,-0.072186,-0.02564,-0.133705,-0.053682,-0.07728,-0.061905,-0.053008,-0.047451,-0.012107,-0.174107,-0.07607,-0.014058,-0.052467,-0.046566,-0.066715,-0.466228,,-0.004508,-0.039244,-0.043952,-0.357336,-0.037361,-0.007714,-0.076255,-0.007145,-0.017078,-0.086011,-0.048232,-0.189618,-0.014522,-0.034652,-0.170406,,,-0.148037,-0.193187,,-0.018164,-0.077377,-0.046868,-0.31766,-0.05416,-0.054508,-0.032307,-0.188967,-0.089113,,-0.117366,-0.280807,-0.022383,-0.036359,-0.073409,-0.139922,-0.145881,-0.26321,-0.033085,1.335937,-0.055048,-0.031978,-0.355535,-0.017009,-0.005439,-0.063678,-0.046774,-0.326752,-0.041658,-0.181834,-0.473676,-0.06563,-0.130298,-0.345945,-0.040255,-0.031672,-0.061696,-0.418728,-0.19764,-0.095903,-0.108177,-0.035217,-0.213517,-0.030448,-0.185723,-0.029924,-0.15811,-0.218615,-0.076133,-0.582034,-0.014784,-0.05387,-0.01514,-0.139232,-0.107663,-0.190703,-0.007915,-0.026327,-0.047214,-0.024438,-0.034565,-0.112963,-0.038788,-0.116006,-0.06547,-0.206636,-0.212451,-0.009755,-0.111689,-0.357055,-0.117005,-0.093901,-0.033756,-0.187771,-0.0051,-0.00963,-0.007384,-0.011695,-0.065081,-0.009588,-0.020736,-0.149392,-0.3601,-0.42227,0.469209,0.417891,-0.510571,0.458947,0.417272,-0.548222,0.475338,-0.417072,-0.570005,-0.406802,-0.413013,-0.458774,-0.41186,-0.416452,-0.542811,0
2,-0.179294,-0.065754,-0.238323,-0.104036,-0.078483,-0.116726,-0.178096,-0.215031,,-0.092402,-0.082871,-0.15854,-0.111964,-0.024089,-0.019059,-0.015788,-0.045439,-0.011497,-0.008705,-0.054198,-0.087955,-0.12999,-0.11044,-0.055536,-0.03872,-0.086761,-0.051684,-0.011923,-0.012835,-0.013986,-0.021677,-0.07585,-0.0347,-0.100478,-0.009172,-0.031379,-0.057205,-0.353825,-0.024702,-0.11857,-0.133784,-0.147802,,-0.013262,-0.073694,-0.197899,-0.105715,-0.024919,-0.013528,-0.003289,-0.060935,-0.008311,-0.157523,-0.059689,-0.095306,-0.103032,-0.103366,-0.027662,-0.048216,-0.020925,-0.134269,-0.017703,-0.080102,-0.031835,-0.072186,-0.02564,-0.133705,-0.053682,-0.07728,-0.061905,-0.053008,-0.047451,-0.012107,-0.174107,-0.07607,-0.014058,-0.052467,-0.046566,-0.066715,-0.466228,,-0.004508,-0.039244,-0.043952,-0.357336,-0.037361,-0.007714,-0.076255,-0.007145,-0.017078,-0.086011,-0.048232,-0.189618,-0.014522,-0.034652,-0.170406,,,-0.148037,-0.193187,,-0.018164,-0.077377,-0.046868,-0.31766,-0.05416,-0.054508,-0.032307,-0.188967,-0.089113,,-0.117366,-0.280807,-0.022383,-0.036359,-0.073409,-0.139922,-0.145881,-0.26321,-0.033085,-0.287868,-0.055048,-0.031978,-0.355535,-0.017009,-0.005439,-0.063678,-0.046774,-0.326752,-0.041658,-0.181834,-0.219343,-0.06563,-0.130298,-0.345945,-0.040255,-0.031672,-0.061696,-0.224375,-0.19764,-0.095903,-0.108177,-0.035217,-0.213517,-0.030448,-0.185723,-0.029924,-0.15811,-0.218615,-0.076133,0.383377,-0.014784,-0.05387,-0.01514,-0.139232,-0.107663,-0.190703,-0.007915,-0.026327,-0.047214,-0.024438,-0.034565,-0.112963,-0.038788,-0.116006,-0.06547,-0.206636,-0.212451,-0.009755,-0.111689,-0.357055,3.52422,-0.093901,-0.033756,-0.187771,-0.0051,-0.00963,-0.007384,-0.011695,-0.065081,-0.009588,-0.020736,-0.149392,-0.3601,-0.42227,0.469209,0.417891,-0.510571,0.458947,0.417272,-0.548222,0.475338,-0.417072,-0.570005,-0.406802,-0.413013,-0.458774,-0.41186,-0.416452,-0.542811,1
3,-0.179294,-0.065754,-0.238323,-0.104036,-0.078483,-0.116726,-0.178096,0.591789,-0.01796,-0.092402,-0.082871,-0.15854,-0.111964,-0.024089,-0.019059,-0.015788,-0.045439,-0.011497,-0.008705,-0.054198,-0.087955,-0.12999,1.866255,-0.055536,-0.03872,-0.086761,-0.051684,-0.011923,-0.012835,-0.013986,-0.021677,-0.07585,-0.0347,-0.100478,-0.009172,-0.031379,-0.057205,0.438717,-0.024702,-0.11857,-0.133784,-0.147802,-0.052587,-0.013262,-0.073694,-0.197899,-0.105715,-0.024919,-0.013528,-0.003289,-0.060935,-0.008311,-0.157523,-0.059689,-0.095306,-0.103032,-0.103366,-0.027662,-0.048216,-0.020925,-0.134269,-0.017703,-0.080102,-0.031835,-0.072186,-0.02564,-0.133705,-0.053682,-0.07728,-0.061905,-0.053008,-0.047451,-0.012107,-0.174107,-0.07607,-0.014058,-0.052467,-0.046566,-0.066715,0.701994,4.169424,-0.004508,-0.039244,-0.043952,-0.357336,-0.037361,-0.007714,6.87258,-0.007145,-0.017078,-0.086011,-0.048232,-0.189618,-0.014522,-0.034652,2.677133,-0.014843,-0.010123,1.224995,-0.193187,-0.027031,-0.018164,-0.077377,-0.046868,0.106595,-0.05416,-0.054508,-0.032307,-0.188967,-0.089113,1.836894,-0.117366,0.939659,-0.022383,-0.036359,-0.073409,-0.139922,-0.145881,-0.26321,-0.033085,3.50101,-0.055048,-0.031978,-0.355535,-0.017009,-0.005439,-0.063678,-0.046774,3.584053,-0.041658,-0.181834,2.832657,-0.06563,-0.130298,1.70492,-0.040255,-0.031672,-0.061696,-0.030022,-0.19764,2.797771,-0.108177,-0.035217,1.370751,-0.030448,-0.185723,-0.029924,-0.15811,0.705501,-0.076133,2.475102,-0.014784,-0.05387,-0.01514,-0.139232,-0.107663,1.388155,-0.007915,-0.026327,-0.047214,-0.024438,-0.034565,-0.112963,-0.038788,-0.116006,-0.06547,-0.206636,-0.212451,-0.009755,-0.111689,0.507367,-0.117005,-0.093901,-0.033756,-0.187771,-0.0051,-0.00963,-0.007384,-0.011695,-0.065081,-0.009588,-0.020736,-0.149392,-0.3601,-0.42227,0.469209,0.417891,-0.510571,0.458947,0.417272,-0.548222,0.475338,-0.387662,0.088119,-0.376064,-0.413013,-0.458774,-0.41186,-0.416452,-0.542811,0
4,-0.179294,-0.065754,-0.238323,-0.104036,-0.078483,-0.116726,-0.178096,-0.215031,-0.01796,-0.092402,-0.082871,-0.15854,-0.111964,-0.024089,-0.019059,-0.015788,-0.045439,-0.011497,-0.008705,-0.054198,-0.087955,-0.12999,-0.11044,-0.055536,-0.03872,-0.086761,-0.051684,-0.011923,-0.012835,-0.013986,-0.021677,-0.07585,-0.0347,-0.100478,-0.009172,-0.031379,-0.057205,-0.353825,-0.024702,-0.11857,-0.133784,-0.147802,-0.052587,-0.013262,-0.073694,-0.197899,-0.105715,-0.024919,-0.013528,-0.003289,-0.060935,-0.008311,-0.157523,-0.059689,-0.095306,-0.103032,-0.103366,-0.027662,-0.048216,-0.020925,-0.134269,-0.017703,-0.080102,-0.031835,-0.072186,-0.02564,-0.133705,-0.053682,-0.07728,-0.061905,-0.053008,-0.047451,-0.012107,-0.174107,-0.07607,-0.014058,-0.052467,-0.046566,-0.066715,0.117883,,-0.004508,-0.039244,-0.043952,-0.357336,-0.037361,-0.007714,-0.076255,-0.007145,-0.017078,-0.086011,-0.048232,-0.189618,-0.014522,-0.034652,-0.170406,-0.014843,-0.010123,-0.148037,-0.193187,-0.027031,-0.018164,-0.077377,-0.046868,-0.31766,-0.05416,-0.054508,-0.032307,-0.188967,-0.089113,,3.03766,-0.280807,-0.022383,-0.036359,-0.073409,-0.139922,-0.145881,-0.26321,-0.033085,-0.287868,-0.055048,-0.031978,-0.355535,-0.017009,-0.005439,-0.063678,-0.046774,-0.326752,-0.041658,-0.181834,2.578324,-0.06563,-0.130298,-0.345945,-0.040255,-0.031672,-0.061696,-0.418728,0.468251,-0.095903,-0.108177,-0.035217,-0.213517,-0.030448,-0.185723,-0.029924,-0.15811,-0.218615,-0.076133,-0.099328,-0.014784,-0.05387,-0.01514,1.011661,-0.107663,-0.190703,-0.007915,-0.026327,-0.047214,-0.024438,-0.034565,-0.112963,-0.038788,-0.116006,-0.06547,-0.206636,-0.212451,-0.009755,-0.111689,-0.357055,-0.117005,-0.093901,-0.033756,-0.187771,-0.0051,-0.00963,-0.007384,-0.011695,-0.065081,-0.009588,-0.020736,-0.149392,-0.3601,-0.42227,0.365662,0.417891,-0.510571,0.423986,0.417272,-0.548222,0.4577,-0.417072,-0.570005,-0.406802,-0.413013,-0.458774,-0.41186,-0.416452,-0.542811,0


In [17]:
# отбрасываем константные значения
dataset_600_800 = variance_filter(dataset_600_800, 0.5)


In [18]:
# считаем корреляцию с таргетом
corr_600_800 = check_corr(dataset_600_800)
corr_600_800

spas_symptoms_agr_10_12_sum    0.022697
spas_symptoms_agr_14_3_avg     0.022478
spas_symptoms_agr_13_1_sum     0.022373
spas_symptoms_agr_11_1_avg     0.022152
spas_symptoms_agr_8_12_avg     0.021893
                                 ...   
spas_symptoms_agr_7_6_sum     -0.020821
spas_symptoms_agr_4_3_sum     -0.021741
spas_symptoms_agr_1_1_sum     -0.022496
spas_symptoms_agr_2_3_avg     -0.023163
spas_symptoms_agr_5_6_avg     -0.023248
Length: 198, dtype: float64

### Столбцы с 800 по 930

In [19]:
# считываем необходимое количество столбцов
dataset_800_930 = read_data(dataset_train_path, 800, 931)
dataset_800_930.head()

Unnamed: 0,spas_symptoms_agr_16_3_sum,spas_symptoms_agr_17_6_avg,spas_symptoms_agr_18_6_std,spas_symptoms_agr_19_6_sum,materials_details_0_1_ctg,communication_availability_0_1_flg,payments_details_0_1_cnt,payments_details_1_3_cnt,payments_details_2_6_cnt,payments_details_3_1_cnt,payments_details_4_3_cnt,payments_details_5_6_cnt,payments_details_6_1_cnt,payments_details_7_3_cnt,payments_details_8_6_cnt,payments_details_9_1_cnt,payments_details_10_3_cnt,payments_details_11_6_cnt,payments_details_12_1_cnt,payments_details_13_3_cnt,payments_details_14_6_cnt,payments_details_15_1_cnt,payments_details_16_1d3_avg,payments_details_17_1d3_cnt,payments_details_18_1d3_sum,payments_details_19_1d6_avg,payments_details_20_1d6_cnt,payments_details_21_1d6_sum,payments_details_22_3_cnt,payments_details_23_3d6_avg,payments_details_24_3d6_cnt,payments_details_25_3d6_sum,payments_details_26_6_cnt,payments_details_27_1_sumpct,payments_details_28_3_sumpct,payments_details_29_6_sumpct,user_devices_0_1_cnt,communication_availability_1_1_ctg,user_devices_1_1_cnt,communication_availability_2_1_flg,materials_details_1_1_ctg,user_lifetime_0_1_ctg,user_lifetime_1_1_flg,materials_details_2_1_cnt,communication_availability_3_1_flg,materials_details_3_1_dt,materials_details_4_1_dt,materials_details_5_1_flg,materials_details_6_1_num,materials_details_7_1_flg,materials_details_8_1_flg,user_devices_2_1_cnt,arpu_0_1_sum,arpu_1_3_avg,arpu_2_6_avg,balance_details_0_1_num,charges_details_0_1_sum,charges_details_1_3_avg,charges_details_2_6_avg,charges_details_3_1_sum,charges_details_4_3_avg,charges_details_5_6_avg,charges_details_6_1_sum,charges_details_7_3_avg,charges_details_8_6_avg,charges_details_9_1_sum,charges_details_10_3_avg,charges_details_11_6_avg,tariff_plans_0_1_num,tariff_plans_1_1_num,charges_details_12_1_sum,charges_details_13_3_avg,charges_details_14_6_avg,tariff_plans_2_1_num,tariff_plans_3_1_num,charges_details_15_1_sum,charges_details_16_3_avg,charges_details_17_6_avg,tariff_plans_4_1_num,tariff_plans_5_1_num,charges_details_18_1_sum,charges_details_19_3_avg,charges_details_20_6_avg,tariff_plans_6_1_num,tariff_plans_7_1_num,charges_details_21_1_sum,charges_details_22_3_avg,charges_details_23_6_avg,charges_details_24_1_sum,charges_details_25_3_avg,charges_details_26_6_avg,charges_details_27_1_sum,charges_details_28_3_avg,charges_details_29_6_avg,payments_details_30_1_sum,payments_details_31_3_sum,payments_details_32_6_sum,payments_details_33_1_sum,payments_details_34_3_sum,payments_details_35_6_sum,payments_details_36_1_sum,payments_details_37_3_sum,payments_details_38_6_sum,payments_details_39_1_sum,payments_details_40_3_sum,payments_details_41_6_sum,payments_details_42_1_sum,payments_details_43_3_sum,payments_details_44_6_sum,payments_details_45_1_avg,payments_details_46_1_sum,payments_details_47_3_avg,payments_details_48_3_sum,payments_details_49_6_avg,payments_details_50_6_sum,plan_costs_0_1_sum,plan_costs_1_3_avg,plan_costs_2_6_avg,plan_costs_3_1_sum,plan_costs_4_3_avg,plan_costs_5_6_avg,plan_costs_6_1_sum,plan_costs_7_3_avg,plan_costs_8_6_avg,charges_details_30_1_sum,charges_details_31_3_avg,charges_details_32_6_avg,tariff_plans_8_1_num,tariff_plans_9_1_num,vas_details_0_1_sum,vas_details_1_3_sum,target
0,-0.413926,-0.415941,-0.56747,-0.410849,1,1,-0.484809,-0.582865,-0.621387,0.277911,0.359126,0.404608,0.0,0.0,0.0,-0.00409,-0.007544,-0.009613,0.0,0.0,0.0,-0.004218,0.358846,0.009302,0.043081,0.3887,-0.080449,-0.015497,0.015892,0.172597,-0.107577,-0.022152,0.044289,-0.316965,-0.623833,-0.722535,-0.310186,5,-0.03573,1,1,0,-1,,1,,,-1,,-1,-1,-0.039662,-1.340015,-1.388009,-1.418177,-0.06716,-1.482161,-1.511285,-1.537175,-1.483398,-1.512404,-1.538253,0.125476,0.104903,0.112379,0.125454,0.104868,0.112337,,,-0.850738,-0.8548,-0.857056,,,-1.92945,-1.986848,-2.050207,-2.232055,-2.065076,1.375516,1.396115,1.426084,-0.301287,-1.479226,0.083358,0.059508,0.082743,0.083324,0.059462,0.08269,0.0,0.0,0.0,-0.396153,-0.533037,-0.593226,-0.359508,-0.481712,-0.509249,0.0,0.0,0.0,-0.00359,-0.00553,-0.005738,0.0,0.0,0.0,-0.650403,-0.66455,-0.790306,-1.139354,-0.790365,-1.389611,-1.810725,-1.809937,-1.840097,-1.84736,-1.843824,-1.857884,0.008774,-0.000272,-0.05821,-0.199433,-0.202053,-0.204415,,,,,0
1,-0.413926,-0.415941,-0.56747,-0.410849,1,1,-0.484809,-0.582865,-0.621387,-0.912766,-0.112825,0.160109,0.0,0.0,0.0,-0.00409,-0.007544,-0.009613,0.0,0.0,0.0,-1.27075,-2.358446,-1.875909,-1.818139,-2.182698,-1.382705,-1.328762,-0.526383,-0.006716,-0.775059,-0.795671,-0.241548,-1.659485,-1.401898,-1.354314,-0.310186,5,-0.03573,1,1,0,-1,,1,,,-1,,-1,-1,-0.039662,-1.374242,-1.388009,-1.366894,-0.490323,-1.521237,-1.511285,-1.479236,-1.522538,-1.512404,-1.480228,0.314948,0.320356,0.329689,0.31493,0.320324,0.32965,,,-0.850738,-0.8548,-0.857056,,,-2.104368,-2.105466,-2.089479,-2.232055,-2.065076,1.546938,1.569349,1.609937,-0.301287,-1.479226,0.301367,0.308559,0.315342,0.301338,0.308517,0.315293,0.0,0.0,0.0,-0.396153,-0.533037,-0.593226,-0.99362,-0.768797,-0.630958,0.0,0.0,0.0,-0.00359,-0.00553,-0.005738,0.0,0.0,0.0,-1.407503,-1.282044,-0.790306,-1.486744,-0.767356,-1.555083,-1.810725,-1.809937,-1.802796,-1.750745,-1.746562,-1.736976,-0.263282,-0.271995,-0.284361,-0.199433,-0.202053,-0.204415,,,,,0
2,-0.413926,-0.415941,-0.56747,-0.410849,1,1,-0.484809,-0.582865,-0.621387,0.277911,0.359126,0.404608,0.0,0.0,0.0,-0.00409,-0.007544,-0.009613,0.0,0.0,0.0,-0.004218,0.24338,0.009302,-0.036008,0.182769,-0.080449,-0.120681,0.015892,-0.006716,-0.107577,-0.153412,0.044289,1.140671,1.316906,1.576405,-0.310186,5,-0.03573,1,1,0,-1,,1,,,-1,,-1,-1,-0.039662,-0.518562,-0.508885,-0.472417,-1.184311,-0.544341,-0.51778,-0.468673,-0.544018,-0.517362,-0.468168,0.165476,0.167385,0.174364,0.165454,0.16735,0.174322,,,-0.850738,-0.8548,-0.857056,,,-0.530101,-0.504125,-0.458158,-0.866785,-0.730859,1.375516,1.396115,1.433438,-0.301287,-1.479226,0.107581,0.109318,0.112347,0.107548,0.109273,0.112294,0.0,0.0,0.0,-0.396153,-0.533037,-0.593226,0.029413,0.108191,0.182613,0.0,0.0,0.0,-0.00359,-0.00553,-0.005738,0.0,0.0,0.0,-0.186048,-0.285819,-0.343081,-0.425539,-0.365943,-0.448974,-0.719848,-0.712037,-0.683748,-0.736289,-0.72531,-0.693525,0.008774,-0.000272,-0.01298,-0.199433,-0.202053,-0.204415,,,,,1
3,-0.413926,-0.415941,-0.56747,-0.410849,1,1,1.563276,2.604134,2.693315,0.277911,0.359126,0.160109,0.0,0.0,0.0,-0.00409,-0.007544,-0.009613,0.0,0.0,0.0,1.262314,0.677895,-0.260014,-0.035513,0.013202,-0.180623,-0.293547,2.184989,-0.904191,0.149148,-0.613685,2.045146,0.941546,1.234413,1.774941,-0.310186,4,-0.03573,1,1,0,-1,,1,,,-1,,-1,-1,-0.039662,-0.552789,-0.54405,-0.508197,0.192764,-0.583417,-0.55752,-0.509095,-0.583159,-0.557164,-0.508651,0.302316,0.307428,0.316563,0.302298,0.307397,0.316524,,,-0.850738,-0.8548,-0.857056,,,-0.588407,-0.563434,-0.518577,0.201687,-0.579929,1.375516,1.396115,1.433438,-0.642318,-1.479226,-0.074093,-0.07747,-0.077962,-0.07413,-0.077518,-0.078017,0.0,0.0,0.0,0.773879,1.036088,1.46569,-0.9412,-1.269821,-1.359464,0.0,0.0,0.0,-0.00359,-0.00553,-0.005738,0.0,0.0,0.0,-0.80687,-0.302286,-0.979679,-0.455043,-0.830346,-0.125741,0.062738,0.075586,0.119047,-0.591367,-0.579417,-0.54446,1.845151,1.833862,1.81884,-0.199433,-0.202053,-0.204415,,,,,0
4,-0.413926,-0.415941,-0.56747,-0.410849,1,1,-0.484809,-0.582865,-0.621387,1.468589,0.359126,0.649107,0.0,0.0,0.0,-0.00409,-0.007544,-0.009613,0.0,0.0,0.0,1.262314,0.286385,1.894513,1.805035,0.221868,0.849733,0.776465,0.015892,-0.006716,-0.58435,-0.612168,0.330126,1.785954,1.154953,1.766258,-0.310186,5,-0.03573,1,1,0,-1,,1,,,-1,,-1,-1,-0.039662,-0.621244,-0.61438,-0.579755,0.169811,-0.661569,-0.637,-0.58994,-0.661441,-0.636767,-0.589615,0.165476,0.167385,0.174364,0.165454,0.16735,0.174322,,,-0.850738,-0.8548,-0.857056,,,-0.70502,-0.682052,-0.639416,-1.044864,-0.911974,1.375516,1.396115,1.433438,-0.642318,-1.479226,0.143916,0.146676,0.150408,0.143884,0.146631,0.150356,0.0,0.0,0.0,-0.396153,-0.533037,-0.593226,0.984808,0.037402,0.359287,0.0,0.0,0.0,-0.00359,-0.00553,-0.005738,0.0,0.0,0.0,-0.226427,0.64454,-0.396748,-0.511197,-0.414112,-0.208773,-0.933281,-0.926843,-0.902692,-0.881211,-0.871203,-0.842589,-0.195268,-0.204064,-0.216516,-0.199433,-0.202053,-0.204415,,,,,0


In [20]:
# отбрасываем константные значения
dataset_800_930 = variance_filter(dataset_800_930, 0.5)


In [21]:
# считаем корреляцию с таргетом
corr_800_930 = check_corr(dataset_800_930)
corr_800_930

materials_details_4_1_dt     0.063585
materials_details_3_1_dt     0.059938
materials_details_6_1_num    0.059930
charges_details_13_3_avg     0.031386
charges_details_14_6_avg     0.031338
                               ...   
charges_details_10_3_avg    -0.023347
charges_details_7_3_avg     -0.023352
charges_details_11_6_avg    -0.023568
charges_details_8_6_avg     -0.023573
materials_details_2_1_cnt   -0.041085
Length: 110, dtype: float64