In [44]:
import os
import pandas as pd
import numpy as np
import pickle
from sklearn import set_config
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from joblib import dump, load

In [45]:
clinical_attributes = pd.read_csv('step_03/clinical_attributes.csv')
mutation = pd.read_csv('step_03/mutation.csv')
z_score = pd.read_csv('step_03/z_score.csv')
response = pd.read_csv('step_03/response.csv')

In [46]:
ca_columns = clinical_attributes.columns[1:]
mutation_columns = mutation.columns[1:]
z_score_columns = z_score.columns[1:]
ordinal_columns = ["cellularity", "integrative_cluster"]
one_hot_columns = ["type_of_breast_surgery", "cancer_type", "cancer_type_detailed", "pam50_+_claudin-low_subtype", "er_status_measured_by_ihc", "er_status", "her2_status_measured_by_snp6", "her2_status", "tumor_other_histologic_subtype", "inferred_menopausal_state", "primary_tumor_laterality", "oncotree_code", "pr_status", "3-gene_classifier_subtype"]

In [47]:
len(one_hot_columns)

14

In [48]:
df = pd.merge(clinical_attributes, z_score, on='patient_id', how='inner')
df = pd.merge(df, mutation, on='patient_id', how='inner')
df.drop(columns=['patient_id'], inplace=True)
df

Unnamed: 0,age_at_diagnosis,type_of_breast_surgery,cancer_type,cancer_type_detailed,cellularity,chemotherapy,pam50_+_claudin-low_subtype,cohort,er_status_measured_by_ihc,er_status,...,ppp2cb_mut,smarcd1_mut,nras_mut,ndfip1_mut,hras_mut,prps2_mut,smarcb1_mut,stmn2_mut,siah1_mut,mut_count
0,43.19,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumA,1.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,2
1,48.87,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,1,LumB,1.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,2
2,47.68,MASTECTOMY,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,Moderate,1,LumB,1.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,1
3,56.45,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,1,LumB,1.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,4
4,66.91,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,0,LumB,1.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,52.90,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumB,4.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,3
1178,56.90,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumA,4.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,5
1179,43.10,BREAST CONSERVING,Breast Cancer,Breast Invasive Lobular Carcinoma,High,0,LumA,4.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,4
1180,42.88,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumB,4.0,Positve,Positive,...,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,no_mutation,6


## Encoding

Se hace uso de de encoding y estandarizacion para ciertos atributos:
- **Atributos clinicos:** Encoding + Estandarizacion
- **mRNA z-score:** Nada
- **Mutaciones:** One-Hot + Estandarización

Cuando usar:
- **Ordinal encoding:** Los datos tienen un orden con sentido como 'low', 'medium', 'high' o 'primero', 'segundo', 'tercero'
- **One-Hot encoding:** cuando los datos no tienen un orden entre categorias, ejemplo: 'rojo', 'verde', 'azul'

Importante: Dado que los clinical attributes deben ser procesados con un scaler, las mutaciones con ordinal encoder + scaler y hay data sin tocar, entonces se utilizará: Sklearn.compose.ColumnTransformer
https://scikit-learn.org/stable/modules/generated/sklearn.compose.ColumnTransformer.html

¿Por qué escalar?
- En algoritmos basados en árboles (RF, DT, GBM) no es necesario escalar ya que no son suceptibles al este cambio.
- En algoritmos basados en distancias (KNN, SVM, Logistic Regression) se benefician de las métricas de distancias, por ello se recomienda escalar

https://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf

In [49]:
set_config(display="diagram")

In [50]:
def encoding_scaler_pipeline(df: pd.DataFrame, scaler: str):
    ordinal_columns = ["cellularity", "integrative_cluster"]
    one_hot_columns = ["type_of_breast_surgery", "cancer_type", "cancer_type_detailed", "pam50_+_claudin-low_subtype", "er_status_measured_by_ihc", "er_status", "her2_status_measured_by_snp6", "her2_status", "tumor_other_histologic_subtype", "inferred_menopausal_state", "primary_tumor_laterality", "oncotree_code", "pr_status", "3-gene_classifier_subtype"]
    mut_columns = [col for col in df.columns if "_mut" in col]
    clinical_attributes = ["age_at_diagnosis", "type_of_breast_surgery", "cancer_type", "cancer_type_detailed", "cellularity", "chemotherapy", "pam50_+_claudin-low_subtype", "cohort", "er_status_measured_by_ihc", "er_status", "neoplasm_histologic_grade", "her2_status_measured_by_snp6", "her2_status", "tumor_other_histologic_subtype", "hormone_therapy", "inferred_menopausal_state", "integrative_cluster", "primary_tumor_laterality", "lymph_nodes_examined_positive", "mutation_count", "nottingham_prognostic_index", "oncotree_code", "pr_status", "radio_therapy", "3-gene_classifier_subtype", "tumor_size", "tumor_stage"]
    z_score_cols = [col for col in df.columns if col not in clinical_attributes and "mut" not in col ]
    # Valores que ya son numeros
    encoding_ignore_list = z_score_cols + list(set(clinical_attributes) - (set(one_hot_columns) | set(ordinal_columns)))

    print("Encoding data:")
    print(f"- One_hot (mut):  {len(mut_columns)}")
    print(f"- Ordinal (hand): {len(ordinal_columns)}")
    print(f"- One_hot (hand): {len(one_hot_columns)}")
    print(f"- ignored:        {len(encoding_ignore_list)}")

    encoding_step = ColumnTransformer(
        transformers = [
            ('mutations', OneHotEncoder(handle_unknown='ignore', sparse_output=False), mut_columns),
            ('ordinal', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1, encoded_missing_value=-1), ordinal_columns),
            ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False), one_hot_columns),
            # Todas las que no quiero escalar porque ya son numeros
            # Se encuentran 2:
            # - z_score
            # - Aquellos en clinical_attributes que ya son numeros
            ('passthrough', 'passthrough', encoding_ignore_list)
        ]
    )

    # Para extraer las columnas resultantes
    encoding_step.fit(df)

    scaler_columns = encoding_step.get_feature_names_out()
    # El nombre cambia a passthrough__algo
    scaler_columns = [i for i, name in enumerate(scaler_columns) if name.replace("passthrough__","") not in z_score_cols]
    
    print("Resulting encoding:")
    print(f"Number of Z-score columns: {len(z_score_cols)}")
    print(f"New columns count: {len(scaler_columns)}")
    

    if scaler == "StandardScaler":
        scaler_step = ColumnTransformer(
            transformers = [
                ("scaling", StandardScaler(), scaler_columns)
            ],
            remainder='passthrough'
        )
    elif scaler == "MinMaxScaler":
        scaler_step = ColumnTransformer(
            transformers = [
                ("scaling", MinMaxScaler(), scaler_columns)
            ],
            remainder='passthrough'
        )
    elif scaler == "RobustScaler":
        scaler_step = ColumnTransformer(
            transformers = [
                ("scaling", RobustScaler(), scaler_columns)
            ],
            remainder='passthrough'
        )
    else:
        return None, None

    pipeline = Pipeline(
        steps=[
            ("preprocessor", encoding_step),
            ("scaler", scaler_step)
        ]
    )

    return pipeline, pipeline.fit(X=df), (
        list(encoding_step.named_transformers_['mutations'].get_feature_names_out()) +
        list(encoding_step.named_transformers_['ordinal'].get_feature_names_out()) +
        list(encoding_step.named_transformers_['onehot'].get_feature_names_out()) +
        list(encoding_step.named_transformers_['passthrough'].get_feature_names_out())
    )


In [51]:
pipeline, standard_scaler, columns = encoding_scaler_pipeline(df, "StandardScaler")
std_df = pd.DataFrame(data = standard_scaler.transform(df), columns=columns)
std_df

Encoding data:
- One_hot (mut):  173
- Ordinal (hand): 2
- One_hot (hand): 14
- ignored:        500
Resulting encoding:
Number of Z-score columns: 489
New columns count: 641


Unnamed: 0,pik3ca_mut_deletion,pik3ca_mut_delins,pik3ca_mut_frameshift,pik3ca_mut_insertion,pik3ca_mut_missense,pik3ca_mut_mixed,pik3ca_mut_no_mutation,tp53_mut_deletion,tp53_mut_delins,tp53_mut_frameshift,...,chemotherapy,age_at_diagnosis,neoplasm_histologic_grade,tumor_stage,cohort,tumor_size,lymph_nodes_examined_positive,radio_therapy,hormone_therapy,nottingham_prognostic_index
0,-0.105454,-0.041169,-0.065177,-0.029099,-0.828349,-0.041169,0.862185,-0.101274,-0.029099,-0.252797,...,-0.4412,0.4534,0.4068,0.7634,0.0231,0.9121,-0.9538,-0.2264,0.5398,-0.8920
1,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,-0.5381,0.0668,0.8344,1.7227,0.4024,-3.7172,-1.5538,1.3701,-0.1078,0.3655
2,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,-0.5630,-0.7078,0.8228,0.6819,-0.1948,-2.3286,-0.9924,-0.3154,0.2320,-0.4828
3,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,-1.3151,0.0917,1.6814,2.2495,-1.6522,-3.4775,-0.4892,1.1938,-0.1027,0.4331
4,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,-0.6038,3.6764,2.5064,-0.1326,2.4715,-3.3516,-0.5036,1.3470,0.9454,1.9603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,-0.105454,-0.041169,-0.065177,-0.029099,-0.828349,-0.041169,0.862185,-0.101274,-0.029099,-0.252797,...,-0.4241,-0.6975,0.8607,0.6122,0.9478,-2.0372,-0.8837,-0.6751,-0.7979,-0.5060
1178,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,-0.6217,-0.1064,0.5544,-0.1777,1.4395,-1.7915,-0.2462,-0.2934,0.1854,-0.6448
1179,-0.105454,-0.041169,-0.065177,-0.029099,1.207220,-0.041169,-1.159844,-0.101274,-0.029099,-0.252797,...,0.7859,1.5304,1.1196,0.8116,0.0958,-1.2644,-0.3566,-0.2615,-0.6371,-0.5540
1180,-0.105454,-0.041169,-0.065177,-0.029099,-0.828349,-0.041169,0.862185,-0.101274,-0.029099,-0.252797,...,-0.5294,-0.6610,-0.1610,0.2251,0.5127,-0.3412,-1.0703,-0.6950,0.0641,-0.8863


In [52]:
pipeline

In [53]:
pipeline, minmax_scaler, columns = encoding_scaler_pipeline(df, "MinMaxScaler")
minmax_df = pd.DataFrame(data = minmax_scaler.transform(df), columns=columns)
minmax_df

Encoding data:
- One_hot (mut):  173
- Ordinal (hand): 2
- One_hot (hand): 14
- ignored:        500
Resulting encoding:
Number of Z-score columns: 489
New columns count: 641


Unnamed: 0,pik3ca_mut_deletion,pik3ca_mut_delins,pik3ca_mut_frameshift,pik3ca_mut_insertion,pik3ca_mut_missense,pik3ca_mut_mixed,pik3ca_mut_no_mutation,tp53_mut_deletion,tp53_mut_delins,tp53_mut_frameshift,...,chemotherapy,age_at_diagnosis,neoplasm_histologic_grade,tumor_stage,cohort,tumor_size,lymph_nodes_examined_positive,radio_therapy,hormone_therapy,nottingham_prognostic_index
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.4412,0.4534,0.4068,0.7634,0.0231,0.9121,-0.9538,-0.2264,0.5398,-0.8920
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.5381,0.0668,0.8344,1.7227,0.4024,-3.7172,-1.5538,1.3701,-0.1078,0.3655
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.5630,-0.7078,0.8228,0.6819,-0.1948,-2.3286,-0.9924,-0.3154,0.2320,-0.4828
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-1.3151,0.0917,1.6814,2.2495,-1.6522,-3.4775,-0.4892,1.1938,-0.1027,0.4331
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.6038,3.6764,2.5064,-0.1326,2.4715,-3.3516,-0.5036,1.3470,0.9454,1.9603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.4241,-0.6975,0.8607,0.6122,0.9478,-2.0372,-0.8837,-0.6751,-0.7979,-0.5060
1178,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.6217,-0.1064,0.5544,-0.1777,1.4395,-1.7915,-0.2462,-0.2934,0.1854,-0.6448
1179,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.7859,1.5304,1.1196,0.8116,0.0958,-1.2644,-0.3566,-0.2615,-0.6371,-0.5540
1180,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.5294,-0.6610,-0.1610,0.2251,0.5127,-0.3412,-1.0703,-0.6950,0.0641,-0.8863


In [54]:
pipeline, robust_scaler, columns = encoding_scaler_pipeline(df, "RobustScaler")
robust_df = pd.DataFrame(data = robust_scaler.transform(df), columns=columns)
robust_df

Encoding data:
- One_hot (mut):  173
- Ordinal (hand): 2
- One_hot (hand): 14
- ignored:        500
Resulting encoding:
Number of Z-score columns: 489
New columns count: 641


Unnamed: 0,pik3ca_mut_deletion,pik3ca_mut_delins,pik3ca_mut_frameshift,pik3ca_mut_insertion,pik3ca_mut_missense,pik3ca_mut_mixed,pik3ca_mut_no_mutation,tp53_mut_deletion,tp53_mut_delins,tp53_mut_frameshift,...,chemotherapy,age_at_diagnosis,neoplasm_histologic_grade,tumor_stage,cohort,tumor_size,lymph_nodes_examined_positive,radio_therapy,hormone_therapy,nottingham_prognostic_index
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.4412,0.4534,0.4068,0.7634,0.0231,0.9121,-0.9538,-0.2264,0.5398,-0.8920
1,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,-0.5381,0.0668,0.8344,1.7227,0.4024,-3.7172,-1.5538,1.3701,-0.1078,0.3655
2,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,-0.5630,-0.7078,0.8228,0.6819,-0.1948,-2.3286,-0.9924,-0.3154,0.2320,-0.4828
3,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,-1.3151,0.0917,1.6814,2.2495,-1.6522,-3.4775,-0.4892,1.1938,-0.1027,0.4331
4,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,-0.6038,3.6764,2.5064,-0.1326,2.4715,-3.3516,-0.5036,1.3470,0.9454,1.9603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.4241,-0.6975,0.8607,0.6122,0.9478,-2.0372,-0.8837,-0.6751,-0.7979,-0.5060
1178,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,-0.6217,-0.1064,0.5544,-0.1777,1.4395,-1.7915,-0.2462,-0.2934,0.1854,-0.6448
1179,0.0,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,0.0,0.0,...,0.7859,1.5304,1.1196,0.8116,0.0958,-1.2644,-0.3566,-0.2615,-0.6371,-0.5540
1180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.5294,-0.6610,-0.1610,0.2251,0.5127,-0.3412,-1.0703,-0.6950,0.0641,-0.8863


In [55]:
std_df.to_csv("step_04/dataset_std.csv", index=False)
minmax_df.to_csv("step_04/dataset_minmax.csv", index=False)
robust_df.to_csv("step_04/dataset_robust.csv", index=False)
response.drop(columns=["patient_id"]).to_csv("step_04/response.csv", index=False)

In [56]:
if not os.path.exists("models"):
    os.mkdir("models")

dump(standard_scaler, "models/std_scaler.joblib")
dump(minmax_scaler, "models/minmax_scaler.joblib")
dump(robust_scaler, "models/robust_scaler.joblib")

['models/robust_scaler.joblib']

### Testings

In [57]:
rt = load("results/robust_scaler.joblib")

In [58]:
data = pd.read_csv("data_separated/test_data.csv")
data

Unnamed: 0,age_at_diagnosis,type_of_breast_surgery,cancer_type,cancer_type_detailed,cellularity,chemotherapy,pam50_+_claudin-low_subtype,cohort,er_status_measured_by_ihc,er_status,...,mtap_mut,ppp2cb_mut,smarcd1_mut,nras_mut,ndfip1_mut,hras_mut,prps2_mut,smarcb1_mut,stmn2_mut,siah1_mut
0,63.53,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumB,2.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
1,51.74,MASTECTOMY,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,Moderate,0,LumA,1.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
2,66.75,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,0,LumA,1.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
3,80.17,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumB,3.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
4,67.15,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,0,Her2,3.0,Negative,Negative,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,61.88,BREAST CONSERVING,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,High,0,LumA,3.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
106,65.48,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,0,claudin-low,3.0,Positve,Negative,...,0,0,0,0,0,0,0,0,0,0
107,38.86,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,1,LumA,1.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0
108,44.36,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,High,0,LumA,3.0,Positve,Positive,...,0,0,0,0,0,0,0,0,0,0


In [59]:
data.dtypes

age_at_diagnosis          float64
type_of_breast_surgery     object
cancer_type                object
cancer_type_detailed       object
cellularity                object
                           ...   
hras_mut                    int64
prps2_mut                   int64
smarcb1_mut                 int64
stmn2_mut                   int64
siah1_mut                   int64
Length: 689, dtype: object

In [60]:
mut_columns = [col for col in data.columns if "_mut" in col]
for col in mut_columns:
    data[col] = data[col].astype('object')

print("Adjusted test dataframe data types:")
print(data.dtypes)

Adjusted test dataframe data types:
age_at_diagnosis          float64
type_of_breast_surgery     object
cancer_type                object
cancer_type_detailed       object
cellularity                object
                           ...   
hras_mut                   object
prps2_mut                  object
smarcb1_mut                object
stmn2_mut                  object
siah1_mut                  object
Length: 689, dtype: object


In [61]:
rt.transform(data)

array([[ 0.    ,  0.    ,  0.    , ...,  3.2955, -0.68  ,  1.0835],
       [ 0.    ,  0.    ,  0.    , ..., -0.528 ,  0.2123, -0.7469],
       [-1.    ,  0.    ,  0.    , ..., -0.7529, -0.4853, -0.6189],
       ...,
       [ 0.    ,  0.    ,  0.    , ...,  0.0554,  0.7301, -0.0947],
       [ 0.    ,  0.    ,  0.    , ..., -0.2562,  0.8312, -0.1766],
       [-1.    ,  0.    ,  0.    , ...,  8.6199,  0.2396,  1.8495]])