In [33]:
from sklearn.model_selection import KFold
import os
import pandas as pd
import numpy as np
from src.read_data import load_reference_data2
from src.preprocess import stratify5
from sklearn.preprocessing import OrdinalEncoder
from src.aggregate_features import generate_lrtt_features, generate_maf_features
from src.read_data import load_reference_data2
from src.smoothing import apply_smoothing, expand_coefficients, apply_pspline_smoothing

kf = KFold(n_splits=5)

# Directory to save fold data and indices 
output_dir = './data/derived/unsmoothed/'
output_dir_bspline = './data/derived/smoothed/bspline/'
output_dir_pspline = './data/derived/smoothed/pspline/'
indices_dir = './data/derived/indices/'


#LOAD DATASETS
first_second_codon_pos, third_codon_pos, gag, pol, gp120, gp41 = load_reference_data2('./data/')
phylo = pd.read_csv('./data/derived/imputed_phylo_data.csv')
maf = pd.read_csv('./data/derived/imputed_MAF_data2.csv')
ambig = pd.read_csv('./data/derived/ambiguity/ambiguity_genes.csv')

#ADD TSI CATEGORIES
phylo = stratify5(phylo)
maf = stratify5(maf)

#sort by ID and coordinate
phylo = phylo.sort_values(by=['RENAME_ID', 'xcoord'])
maf = maf.sort_values(by=['RENAME_ID', 'Window_Centre'])

#ENCODE STRATA IN FIVE CLASSES
tsi_order = ['0m-6m', '6m-12m', '12m-24m', '2y-4y', '4y+']
ordinal_encoder = OrdinalEncoder(categories=[tsi_order])
phylo['TSI_encoded'] = ordinal_encoder.fit_transform(phylo[['TSI_category']])
maf['TSI_encoded'] = ordinal_encoder.fit_transform(maf[['TSI_category']])

#FIND UNIQUE IDS   
unique_ids = phylo['RENAME_ID'].unique()

  phylo = pd.read_csv('./data/derived/imputed_phylo_data.csv')


In [34]:
# Step 1: Generate and save indices for each fold, ensuring grouped splitting
for fold, (train_index, test_index) in enumerate(kf.split(unique_ids), 1):
    # Get the train and test RENAME_IDs
    train_ids = unique_ids[train_index]
    test_ids = unique_ids[test_index]

    # Save these RENAME_IDs directly instead of indices
    np.save(os.path.join(indices_dir, f'train_ids_fold{fold}.npy'), train_ids)
    np.save(os.path.join(indices_dir, f'test_ids_fold{fold}.npy'), test_ids)
    print(f"Saved RENAME_IDs for fold {fold}")

Saved RENAME_IDs for fold 1
Saved RENAME_IDs for fold 2
Saved RENAME_IDs for fold 3
Saved RENAME_IDs for fold 4
Saved RENAME_IDs for fold 5


# BSPLINE SMOOTHED

In [36]:
for fold in range(1, 6):  
    # Load the train and test RENAME_IDs for the current fold
    train_ids = np.load(os.path.join(indices_dir, f'train_ids_fold{fold}.npy'), allow_pickle=True)
    test_ids = np.load(os.path.join(indices_dir, f'test_ids_fold{fold}.npy'), allow_pickle=True)
    
    # Create train/test datasets based on the fold's split
    train_phylo = phylo[phylo['RENAME_ID'].isin(train_ids)]
    test_phylo = phylo[phylo['RENAME_ID'].isin(test_ids)]
    train_maf = maf[maf['RENAME_ID'].isin(train_ids)]
    test_maf = maf[maf['RENAME_ID'].isin(test_ids)]
    train_ambig = ambig[ambig['RENAME_ID'].isin(train_ids)]
    test_ambig = ambig[ambig['RENAME_ID'].isin(test_ids)]

    # B-Spline Smoothing
    smoothed_lrtt_train, coefficients_lrtt_train = apply_smoothing(train_phylo, 'xcoord', 'normalised.largest.rtt', 14)
    smoothed_maf12c_train, coefficients_maf12c_train = apply_smoothing(train_maf, 'Window_Centre', 'MAF12c_Mean', 16)
    smoothed_maf3c_train, coefficients_maf3c_train = apply_smoothing(train_maf, 'Window_Centre', 'MAF3c_Mean', 6)

    smoothed_lrtt_test, coefficients_lrtt_test = apply_smoothing(test_phylo, 'xcoord', 'normalised.largest.rtt', 14)
    smoothed_maf12c_test, coefficients_maf12c_test = apply_smoothing(test_maf, 'Window_Centre', 'MAF12c_Mean', 16)
    smoothed_maf3c_test, coefficients_maf3c_test = apply_smoothing(test_maf, 'Window_Centre', 'MAF3c_Mean', 6)

    # Adding smoothed values to the DataFrame
    train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
    train_maf['smoothed_maf12c'] = smoothed_maf12c_train
    train_maf['smoothed_maf3c'] = smoothed_maf3c_train

    test_phylo['smoothed_lrtt'] = smoothed_lrtt_test
    test_maf['smoothed_maf12c'] = smoothed_maf12c_test
    test_maf['smoothed_maf3c'] = smoothed_maf3c_test

    # Generate aggregated features
    train_phylo_features = generate_lrtt_features(train_phylo, gag, pol, gp120, gp41, feature='smoothed_lrtt')
    test_phylo_features = generate_lrtt_features(test_phylo, gag, pol, gp120, gp41, feature='smoothed_lrtt')

    train_maf_features = generate_maf_features(train_maf, gag, pol, gp120, gp41, feature_12c='smoothed_maf12c', feature_3c='smoothed_maf3c')
    test_maf_features = generate_maf_features(test_maf, gag, pol, gp120, gp41, feature_12c='smoothed_maf12c', feature_3c='smoothed_maf3c')

    # Merge the aggregated features
    train_features = pd.merge(train_phylo_features, train_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')
    test_features = pd.merge(test_phylo_features, test_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')

    # Finalize the features
    train_features = stratify5(train_features)
    test_features = stratify5(test_features)

    # Add coefficients to the aggregated features
    train_features = expand_coefficients(train_features, coefficients_lrtt_train, 'lrtt')
    train_features = expand_coefficients(train_features, coefficients_maf12c_train, 'maf12c')
    train_features = expand_coefficients(train_features, coefficients_maf3c_train, 'maf3c')

    test_features = expand_coefficients(test_features, coefficients_lrtt_test, 'lrtt')
    test_features = expand_coefficients(test_features, coefficients_maf12c_test, 'maf12c')
    test_features = expand_coefficients(test_features, coefficients_maf3c_test, 'maf3c')

    # Merge ambiguity data
    train_features = pd.merge(train_features, train_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')
    test_features = pd.merge(test_features, test_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')

    # Save the fold's train/test datasets
    train_features.to_csv(os.path.join(output_dir_bspline, f'training_data_fold{fold}.csv'), header=True)
    test_features.to_csv(os.path.join(output_dir_bspline, f'test_data_fold{fold}.csv'), header=True)

    print(f"Completed processing fold {fold}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 5


# UNSMOOTHED

In [37]:
for fold in range(1, 6):
    # Load the train and test RENAME_IDs for the current fold
    train_ids = np.load(os.path.join(indices_dir, f'train_ids_fold{fold}.npy'), allow_pickle=True)
    test_ids = np.load(os.path.join(indices_dir, f'test_ids_fold{fold}.npy'), allow_pickle=True)

    #Create train/test datasets based on the fold's split
    train_phylo = phylo[phylo['RENAME_ID'].isin(train_ids)]
    test_phylo = phylo[phylo['RENAME_ID'].isin(test_ids)]

    train_maf = maf[maf['RENAME_ID'].isin(train_ids)]
    test_maf = maf[maf['RENAME_ID'].isin(test_ids)]
    
    train_ambig = ambig[ambig['RENAME_ID'].isin(train_ids)]
    test_ambig = ambig[ambig['RENAME_ID'].isin(test_ids)]

    # Aggregate features 
    train_phylo_features = generate_lrtt_features(train_phylo, gag, pol, gp120, gp41, feature='normalised.largest.rtt')
    test_phylo_features = generate_lrtt_features(test_phylo, gag, pol, gp120, gp41, feature='normalised.largest.rtt')

    train_maf_features = generate_maf_features(train_maf, gag, pol, gp120, gp41, feature_12c = 'MAF12c_Mean', feature_3c= 'MAF3c_Mean')
    test_maf_features = generate_maf_features(test_maf, gag, pol, gp120, gp41, feature_12c = 'MAF12c_Mean', feature_3c= 'MAF3c_Mean')

    # Merge features
    train_features = pd.merge(train_phylo_features, train_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')
    test_features = pd.merge(test_phylo_features, test_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')

    # Finalize the features
    train_features = stratify5(train_features)
    test_features = stratify5(test_features)

    # Merge ambiguity data
    train_features = pd.merge(train_features, train_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')
    test_features = pd.merge(test_features, test_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')

    # Save the fold's train/test datasets
    train_features.to_csv(os.path.join(output_dir, f'training_data_fold{fold}.csv'), header=True)
    test_features.to_csv(os.path.join(output_dir, f'test_data_fold{fold}.csv'), header=True)
    
    print(f"Completed processing fold {fold} for unsmoothed data")


Completed processing fold 1 for unsmoothed data
Completed processing fold 2 for unsmoothed data
Completed processing fold 3 for unsmoothed data
Completed processing fold 4 for unsmoothed data
Completed processing fold 5 for unsmoothed data


# PSPLINE SMOOTHED

In [38]:
lambda_penalty = 1.0

for fold in range(1, 6):
    # Load the train and test RENAME_IDs for the current fold
    train_ids = np.load(os.path.join(indices_dir, f'train_ids_fold{fold}.npy'), allow_pickle=True)
    test_ids = np.load(os.path.join(indices_dir, f'test_ids_fold{fold}.npy'), allow_pickle=True)

    # Create train/test datasets based on the fold's split
    train_phylo = phylo[phylo['RENAME_ID'].isin(train_ids)]
    test_phylo = phylo[phylo['RENAME_ID'].isin(test_ids)]
    train_maf = maf[maf['RENAME_ID'].isin(train_ids)]
    test_maf = maf[maf['RENAME_ID'].isin(test_ids)]
    train_ambig = ambig[ambig['RENAME_ID'].isin(train_ids)]
    test_ambig = ambig[ambig['RENAME_ID'].isin(test_ids)]

    # B-Spline Smoothing
    smoothed_lrtt_train, coefficients_lrtt_train = apply_pspline_smoothing(train_phylo, 'xcoord', 'normalised.largest.rtt', 13, lambda_penalty)
    smoothed_maf12c_train, coefficients_maf12c_train = apply_pspline_smoothing(train_maf, 'Window_Centre', 'MAF12c_Mean', 20, lambda_penalty)
    smoothed_maf3c_train, coefficients_maf3c_train = apply_pspline_smoothing(train_maf, 'Window_Centre', 'MAF3c_Mean', 4, lambda_penalty)

    smoothed_lrtt_test, coefficients_lrtt_test = apply_pspline_smoothing(test_phylo, 'xcoord', 'normalised.largest.rtt', 13, lambda_penalty)
    smoothed_maf12c_test, coefficients_maf12c_test = apply_pspline_smoothing(test_maf, 'Window_Centre', 'MAF12c_Mean', 20, lambda_penalty)
    smoothed_maf3c_test, coefficients_maf3c_test = apply_pspline_smoothing(test_maf, 'Window_Centre', 'MAF3c_Mean', 4, lambda_penalty)

    # Adding smoothed values to the DataFrame
    train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
    train_maf['smoothed_maf12c'] = smoothed_maf12c_train
    train_maf['smoothed_maf3c'] = smoothed_maf3c_train

    test_phylo['smoothed_lrtt'] = smoothed_lrtt_test
    test_maf['smoothed_maf12c'] = smoothed_maf12c_test
    test_maf['smoothed_maf3c'] = smoothed_maf3c_test

    # Generate aggregated features
    train_phylo_features = generate_lrtt_features(train_phylo, gag, pol, gp120, gp41, feature='smoothed_lrtt')
    test_phylo_features = generate_lrtt_features(test_phylo, gag, pol, gp120, gp41, feature='smoothed_lrtt')

    train_maf_features = generate_maf_features(train_maf, gag, pol, gp120, gp41, feature_12c='smoothed_maf12c', feature_3c='smoothed_maf3c')
    test_maf_features = generate_maf_features(test_maf, gag, pol, gp120, gp41, feature_12c='smoothed_maf12c', feature_3c='smoothed_maf3c')

    # Merge the aggregated features
    train_features = pd.merge(train_phylo_features, train_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')
    test_features = pd.merge(test_phylo_features, test_maf_features, on=['RENAME_ID', 'TSI_days'], how='inner')

    # Finalize the features
    train_features = stratify5(train_features)
    test_features = stratify5(test_features)

    # Add coefficients to the aggregated features
    train_features = expand_coefficients(train_features, coefficients_lrtt_train, 'lrtt')
    train_features = expand_coefficients(train_features, coefficients_maf12c_train, 'maf12c')
    train_features = expand_coefficients(train_features, coefficients_maf3c_train, 'maf3c')

    test_features = expand_coefficients(test_features, coefficients_lrtt_test, 'lrtt')
    test_features = expand_coefficients(test_features, coefficients_maf12c_test, 'maf12c')
    test_features = expand_coefficients(test_features, coefficients_maf3c_test, 'maf3c')

    # Merge ambiguity data
    train_features = pd.merge(train_features, train_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')
    test_features = pd.merge(test_features, test_ambig[['RENAME_ID', 'genome_ambig', 'gag_ambig', 'pol_ambig', 'gp41_ambig', 'gp120_ambig']], on='RENAME_ID', how='inner')

    # Save the fold's train/test datasets
    train_features.to_csv(os.path.join(output_dir_pspline, f'training_data_fold{fold}.csv'), header=True)
    test_features.to_csv(os.path.join(output_dir_pspline, f'test_data_fold{fold}.csv'), header=True)

    print(f"Completed processing fold {fold}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_phylo['smoothed_lrtt'] = smoothed_lrtt_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf12c'] = smoothed_maf12c_train
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_maf['smoothed_maf3c'] = smoothed_maf3c_train
A value is trying to be set on a copy of a slice f

Completed processing fold 5
