In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV, validation_curve
from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score, f1_score, make_scorer
from sklearn.metrics import ConfusionMatrixDisplay, classification_report

import ugtm
from ugtm import eGTM
import altair as alt


### Put together dataframe to generate dataset.

In [22]:
label_scheme = 'labels_2'

lc_features = pd.read_csv('../processed_data/lc_features.csv')
gaia_features = pd.read_csv('../processed_data/gaia_features.csv')

# Drop the columns that are not needed
drop = gaia_features.columns.to_list()[1:16]
gaia_features.drop(drop, axis=1, inplace=True)

# Merge the two datasets
merged = pd.merge(lc_features, gaia_features, on=['oid_ztf'], how='inner')

# Limit to labels_2
dataset = merged[~merged[label_scheme].isnull()].reset_index(drop=True)

# Drop some columns
drop2 = ['FalseAlarm_prob_g','FalseAlarm_prob_r']
dataset.drop(drop2, axis=1, inplace=True)

# Handle some outliers
from outlier import outlier_thresholds_skewed, apply_thresholds
skewed_g = ['CAR_mean_g','CAR_sigma_g','Eta_e_g','Freq1_harmonics_amplitude_0_g', 'Freq1_harmonics_amplitude_1_g', 
                 'Freq1_harmonics_amplitude_2_g', 'Freq1_harmonics_amplitude_3_g','Freq2_harmonics_amplitude_0_g', 
                 'Freq2_harmonics_amplitude_1_g', 'Freq2_harmonics_amplitude_2_g', 'Freq2_harmonics_amplitude_3_g',
                 'Freq3_harmonics_amplitude_0_g', 'Freq3_harmonics_amplitude_1_g', 'Freq3_harmonics_amplitude_2_g', 
                 'Freq3_harmonics_amplitude_3_g','LinearTrend_g', 'MaxSlope_g','PeriodLS_g', 'Period_fit_g','SlottedA_length_g']

skewed_r = ['CAR_mean_r','CAR_sigma_r','Eta_e_r','Freq1_harmonics_amplitude_0_r', 'Freq1_harmonics_amplitude_1_r',
            'Freq1_harmonics_amplitude_2_r', 'Freq1_harmonics_amplitude_3_r','Freq2_harmonics_amplitude_0_r',
            'Freq2_harmonics_amplitude_1_r', 'Freq2_harmonics_amplitude_2_r', 'Freq2_harmonics_amplitude_3_r',
            'Freq3_harmonics_amplitude_0_r', 'Freq3_harmonics_amplitude_1_r', 'Freq3_harmonics_amplitude_2_r',
            'Freq3_harmonics_amplitude_3_r','LinearTrend_r', 'MaxSlope_r','PeriodLS_r', 'Period_fit_r','SlottedA_length_r']

ots = outlier_thresholds_skewed(dataset, skewed_g+skewed_r, iqr_threshold=2, upper_limit=None, lower_limit=0)
dataset = apply_thresholds(dataset, skewed_g+skewed_r, ots)

pd.options.display.max_columns = None
pd.options.display.max_rows = 10

dataset.columns.to_list()


  lc_features = pd.read_csv('../processed_data/lc_features.csv')


['oid_ztf',
 'oid_aavso',
 'type_aavso',
 'ra',
 'dec',
 'Eclipsing',
 'CV_Types',
 'CV_subtypes',
 'CV_subsubtypes',
 'eclipse_clear',
 'manual_label',
 'Clarity',
 'labels_1',
 'labels_2',
 'labels_3',
 'labels_4',
 'Amplitude_g',
 'AndersonDarling_g',
 'Autocor_length_g',
 'Beyond1Std_g',
 'CAR_mean_g',
 'CAR_sigma_g',
 'CAR_tau_g',
 'Con_g',
 'Eta_e_g',
 'FluxPercentileRatioMid20_g',
 'FluxPercentileRatioMid35_g',
 'FluxPercentileRatioMid50_g',
 'FluxPercentileRatioMid65_g',
 'FluxPercentileRatioMid80_g',
 'Freq1_harmonics_amplitude_0_g',
 'Freq1_harmonics_amplitude_1_g',
 'Freq1_harmonics_amplitude_2_g',
 'Freq1_harmonics_amplitude_3_g',
 'Freq1_harmonics_rel_phase_1_g',
 'Freq1_harmonics_rel_phase_2_g',
 'Freq1_harmonics_rel_phase_3_g',
 'Freq2_harmonics_amplitude_0_g',
 'Freq2_harmonics_amplitude_1_g',
 'Freq2_harmonics_amplitude_2_g',
 'Freq2_harmonics_amplitude_3_g',
 'Freq2_harmonics_rel_phase_1_g',
 'Freq2_harmonics_rel_phase_2_g',
 'Freq2_harmonics_rel_phase_3_g',
 'Freq3_h

In [23]:
from sklearn.preprocessing import LabelEncoder

missingness_threshold = 0.5

X = dataset.iloc[:,16:]
y_names = dataset[label_scheme]
enc = LabelEncoder().fit(y_names)
y = enc.transform(y_names)

# Drop columns with missingness > 0.5
missingness = (X.isnull().sum()/X.shape[0]).sort_values(ascending=False)
missing_high = missingness[missingness>missingness_threshold].index.to_list()
print(f'\nColumns removed for missingness fraction of >{missingness_threshold}\n{missing_high}\n')
X = X.drop(missing_high, axis=1)

# Save feature column names.
X_cols = X.columns.to_list()
# Replace np.inf with np.nan
for col in X_cols:
    X.loc[(X[col]==np.inf)|(X[col]==-np.inf), col] = np.nan

# Train, test, split.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=1)

# Print the labelling scheme with associated value counts.
pd.options.display.max_rows = None
print('Labelling scheme:')
print(dataset[label_scheme].value_counts())


Columns removed for missingness fraction of >0.5
['Eta_color']

Labelling scheme:
labels_2
dwarf_nova_SU_UMa    630
dwarf_nova_Z_Cam     174
nova_like            144
nova_like_VY_Scl     120
dwarf_nova_U_Gem     116
polar                114
int_polar             49
AMCVn                 46
nova                  46
Name: count, dtype: int64


### Preprocessing pipeline that outputs the algorithm

In [24]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import ADASYN
from imblearn.pipeline import Pipeline

# Classifier to use.
classifier = RandomForestClassifier(n_estimators=500,
                                    max_depth=14,
                                    max_features=0.4,
                                    min_samples_split=6,
                                    class_weight='balanced_subsample', n_jobs=-1, random_state=1)
classifierNm= 'RF'

# Subclass ColumnTransformer to return a Dataframe with columns instead of just an array (what is usually returned)
# This is useful after imputation.
class ColumnTransformerPandas(ColumnTransformer):
    def fit(self, X, y=None):
        self.columns = X.columns
        return super().fit(X, y)

    def transform(self, X):
        return pd.DataFrame(super().transform(X), columns=self.columns)
    
    def fit_transform(self, X, y=None):
        self.columns = X.columns
        return pd.DataFrame(super().fit_transform(X, y), columns=self.columns)


# Use the above subclasses functions to individually handle the processing of columns.
impute_simple = ColumnTransformerPandas([
    ('imputer', SimpleImputer(strategy='mean'), X_cols)],
    remainder='passthrough')

impute_KNN = ColumnTransformerPandas([
    ('imputer', KNNImputer(n_neighbors=5, weights='distance'), X_cols)],
    remainder='passthrough')

normalise = ColumnTransformerPandas([
    ('normalise', StandardScaler(), X_cols)], 
    remainder='passthrough')

size = 390

print(enc.transform(['dwarf_nova_SU_UMa'])[0])
# undersampling dictionary.
class_dist_us = {enc.transform(['dwarf_nova_SU_UMa'])[0]:size}
            #   enc.transform(['dwarf_nova_Z_Cam'])[0]:100,
            #   enc.transform(['dwarf_nova_U_Gem'])[0]:100,
            #   enc.transform(['nova_like_VY_Scl'])[0]:100,
            #   enc.transform(['polar'])[0]:100,
            #   enc.transform(['nova_like'])[0]:100,
            #   enc.transform(['nova'])[0]:100,
            #   enc.transform(['int_polar'])[0]:100,
            #   enc.transform(['AMCVn'])[0]:100
            #   }


class_dist_os = {enc.transform(['dwarf_nova_SU_UMa'])[0]:size,
                 enc.transform(['dwarf_nova_Z_Cam'])[0]:size,
                 enc.transform(['dwarf_nova_U_Gem'])[0]:size,
                 enc.transform(['nova_like_VY_Scl'])[0]:size,
                 enc.transform(['polar'])[0]:size,
                 enc.transform(['nova_like'])[0]:size,
                 enc.transform(['nova'])[0]:size,
                 enc.transform(['int_polar'])[0]:size,
                 enc.transform(['AMCVn'])[0]:size
                }


undersample = RandomUnderSampler(sampling_strategy=class_dist_us, random_state=1)
oversample = ADASYN(sampling_strategy=class_dist_os, n_neighbors=5,random_state=1)

# normalise
X_train_norm = normalise.fit_transform(X_train)
X_test_norm = normalise.transform(X_test)

# impute
X_train_imp = impute_KNN.fit_transform(X_train_norm)
X_test_imp = impute_KNN.transform(X_test_norm)

# undersample
X_train_us, y_train_us = undersample.fit_resample(X_train_imp, y_train)
print(f'Undersampled training set shape: {X_train_us.shape}')
# oversample
X_train_os, y_train_os = oversample.fit_resample(X_train_us, y_train_us)
print(f'Oversampled training set shape: {X_train_os.shape}')

# # Create a model
mod = classifier.fit(X_train_imp, y_train)



1
Undersampled training set shape: (956, 252)
Oversampled training set shape: (3479, 252)


### Load pretrained model/s

In [28]:
X_train =  X_train_os.copy()
y_train = y_train_os.copy()
X_test = X_test_imp.copy()

### Using class probabilities as input

In [29]:
# Preliminaries
# If you try to create a plot that will directly embed a dataset with more than 5000 rows, you will see a MaxRowsError:
# This is not because Altair cannot handle larger datasets, but it is because it is important for the user to think carefully 
# about how large datasets are handled. As noted above in Why does Altair lead to such extremely large notebooks?, 
# it is quite easy to end up with very large notebooks if you make many visualizations of a large dataset, and this error 
# is a way of preventing that.
# If you are certain you would like to embed your dataset within the visualization specification, you can disable the 
# MaxRows check with the following:

alt.data_transformers.disable_max_rows()


# These are all our class probabilities from our original model.
labels2 = enc.classes_.tolist()
preds_train = mod.predict_proba(X_train)
print(np.min(preds_train, axis=1))
preds_test = mod.predict_proba(X_test)

# Let's add some column names.
preds_train_df = pd.DataFrame()
for count, name in enumerate(labels2):
    preds_train_df[name] = preds_train[:,count]

preds_test_df = pd.DataFrame()
for count, name in enumerate(labels2):
    preds_test_df[name] = preds_test[:,count]


scaler = StandardScaler().fit(preds_train_df)
preds_train_df = scaler.transform(preds_train_df)
preds_test_df = scaler.transform(preds_test_df)

data = preds_train_df
y_set = y_train
X_set = X_train

gtm_model = ugtm.runGTM(data,verbose=False,k=10)

# Use the following if you want to see the projections of the test data onto the latent space.
transformed=ugtm.transform(optimizedModel=gtm_model,train=data,test=preds_test_df)

# mean projection
# mean position of each data point in latent space.
# Further information located here: https://ugtm.readthedocs.io/en/latest/ugtm.html?highlight=ugtm.matY#module-ugtm.ugtm_classes/
# For projection of test data use: mean_u = transformed.matMeans, otherwise use:
mean_u = gtm_model.matMeans

mean_u = pd.DataFrame(mean_u, columns=['U1','U2'])

# Add the labels to the latent space. Since we are using the training data, we use y_train.
mean_u_labels = mean_u.copy()
mean_u_labels['y'] = enc.inverse_transform(y_train)

# May also use. Actually you can't as this is just a scikit learn wrapper and does not contain the methods of the above.
gtm_model2 = eGTM(k=10,verbose=False).fit(data).transform(data)
mean_u2 = pd.DataFrame()
mean_u2['U1'] = gtm_model2[:,0]
mean_u2['U2'] = gtm_model2[:,1]

# Plot the latent space. But do so with a combination of different shapes and colours.

alt.Chart(mean_u_labels).mark_point(size=40).encode(
    x='U1', 
    y='U2',
    color= 'y',
    shape='y',
    tooltip=['y']
    )




[0.00215952 0.01000375 0.00403148 ... 0.002      0.00954879 0.00051685]


In [80]:
from sklearn.preprocessing import MinMaxScaler,StandardScaler

# matY has shape n_dimensions (number of features) * n_nodes in latent space. 
# Manifold in n-dimensional space (projection of matX in data space); 
# A point matY[:,i] is a center of a Gaussian component i on the manifold in data space. Y=WΦT
# Location of each node in the high dimensional space.
# It is related to the actual probability space through Y=W*phi(transpose).
# We therefore need to normalise the matrix to show the strength of a node's association to a given class relative to the other nodes.
refvect = gtm_model.matY

# refvect = MinMaxScaler().fit_transform(refvect.T).T
refvect = scaler.inverse_transform(refvect.T).T


# Here we are plotting matX, the coordinates of the nodes in the 2D space. We will colour code them based on 'label'. So this will be a
# grid of pixels in 2D space that is colour coded by some parameter.
def plot_ref_vect(gtm_matX,label,title,fig_size=(200,200)):
    dfmap = pd.DataFrame(gtm_matX, columns=["x1", "x2"])
    dfmap['Prob'] = label
    map = alt.Chart(dfmap).mark_square().encode(
        x='x1',
        y='x2',
        color=alt.Color('Prob:Q',
                        #scale=alt.Scale(scheme='viridis')),
                        scale=alt.Scale(scheme='turbo')),
        size=alt.value(350),
        tooltip=['x1','x2', 'Prob:Q'],
        #opacity='density'
    ).properties(title = title, width = fig_size[0], height = fig_size[1])
    return map
# %%

# Here we save into variables plots for each of the classes. The colour coding is based on matY. matY defines the central
# position of each Gaussian (node) in feature, or in this case class probability, space. 
gtm_refvect0 = plot_ref_vect(gtm_model.matX,label=refvect[0,:],title=labels2[0])
gtm_refvect1 = plot_ref_vect(gtm_model.matX,label=refvect[1,:],title=labels2[1])
gtm_refvect2 = plot_ref_vect(gtm_model.matX,label=refvect[2,:],title=labels2[2])
gtm_refvect3 = plot_ref_vect(gtm_model.matX,label=refvect[3,:],title=labels2[3])
gtm_refvect4 = plot_ref_vect(gtm_model.matX,label=refvect[4,:],title=labels2[4])
gtm_refvect5 = plot_ref_vect(gtm_model.matX,label=refvect[5,:],title=labels2[5])
gtm_refvect6 = plot_ref_vect(gtm_model.matX,label=refvect[6,:],title=labels2[6])
gtm_refvect7 = plot_ref_vect(gtm_model.matX,label=refvect[7,:],title=labels2[7])
gtm_refvect8 = plot_ref_vect(gtm_model.matX,label=refvect[8,:],title=labels2[8])

gtm_refs_top = gtm_refvect0 |  gtm_refvect1 |  gtm_refvect2 
gtm_refs_middle = gtm_refvect3 | gtm_refvect4 | gtm_refvect5
gtm_refs_bottom = gtm_refvect6 | gtm_refvect7 | gtm_refvect8 
class_maps = alt.vconcat(gtm_refs_top, gtm_refs_middle, gtm_refs_bottom).configure_legend(labelFontSize=15,titleFontSize=17, titleFontWeight='normal').configure_axis(labelFontSize=0, titleFontSize=15, titleFontWeight='normal')
class_maps = class_maps.configure_title(fontSize=18,fontWeight='bold')

class_maps
# %%


In [35]:
print(gtm_model.matX.shape)
print(gtm_model.matY.shape)

# plot matMeans
# This is the mean position of each node in the latent space


(100, 2)
(9, 100)


In [36]:
# Each example in data space is asociated with a location in 2D space. The mapping depends on using either
# matMeans, or matModes. For example if a dataspace example is associated with a particular node, the mean position
# or median position of the datapoints associated with that Gaussian are used to map it to a point in 2D space. During training,
# the Gaussian center is alocated a position in data space based on the mean or median of points associated with it based on 
# responsibility. It is this mean or median that is used in combination with the wieghted contributions of all Gaussians
# that is used to to map the data point to 2D space. Subsequently, we can create a histogram of locations of each example in 2D space. Therefore,
# matMeans and matModes contains the positions of example in 2D space. 
# Create a 2D histogram, H contains the histogram values in a 2D array, this can be used to generate our plot with altair.
# This is like the number of examples associated with each cluster.
H, xedges, yedges = np.histogram2d(gtm_model.matMeans[:,0],gtm_model.matMeans[:,1],[10,10])
# %%
dfmap = pd.DataFrame(gtm_model.matX, columns=["x1", "x2"])
dfmap['size'] = H.flatten()

alt.Chart(dfmap).mark_square().encode(
    x='x1',
    y='x2',
    #color=alt.Color('label:Q',
    #                scale=alt.Scale(scheme='viridis')),
    size='size',
    tooltip=['x1','x2','size'],
    #opacity='density'
).properties(title = "GTM Membership Map",width = 300, height = 300)

In [37]:
from sklearn.preprocessing import minmax_scale

def factor_map(gtm_model, Xfact):
    # Generate a dataframe where the first column is the feature value
    dfclus = pd.DataFrame(Xfact).rename({Xfact.name: 'scale'}, axis=1)
    # matR contains the responsibilities - the posterior probability that a data point
    # belongs to a particular Gaussian - the probability that that Gaussian is responsible for
    # the data point. It has dimensions n_examples x n_nodes.
    # We difine in the membership column the node most responsible for the data point.
    dfclus['membership'] = np.argmax(gtm_model.matR,axis=1)
    # Now group examples by their membership node and then combine the feature values 
    # for each node using their mean value.
    dfclus = dfclus.groupby('membership', as_index=False).agg(np.mean)
    # Now scale the grouped and meaned feature values from 0 to 1.
    dfclus.scale = minmax_scale(dfclus.scale)
    # We now use matX which contains the location in 2D space of all our neurons.
    # This is just a 2D grid. Place in a dataframe.
    df_map = pd.DataFrame(gtm_model.matX, columns=["x1", "x2"])
    # Now assign to each node its node number. This is the node memebership number
    # to which each example was assigned earlier. Some nodes were associated with examples,
    # others were not because examples had greater associations with other Gaussians.
    df_map['membership'] = np.arange(0,100)
    # We now wish to merge the above dataframes to produce a grid on neurons (coordinate values), each one with
    # the mean value of the feature for the examples associated with that neuron (Gaussian).
    df_map = df_map.merge(dfclus,how='left',on='membership')
    # Those nodes without any assigned examples will be given a value of 0
    # df_map.fillna(0,inplace=True)
    # size column is added to assign the size of the square in the plot.
    df_map['size'] = 1
    return df_map

def plot_factor_map(df_map, title='Factor Map',fig_size=(115,115),node_size=1):
    #df_map['size']=df_map['size']*node_size
    return alt.Chart(df_map).mark_square().encode(
        x=alt.X('x1',axis=None),
        y=alt.Y('x2',axis=None),
        color=alt.Color('scale:Q',
                        scale=alt.Scale(scheme='turbo')),
        size=alt.value(140),
        tooltip=['x1','x2','scale:Q'],
        #opacity='density'
    ).properties(title=title, width=fig_size[0], height=fig_size[1])

In [38]:
varnames = X_cols
# varnames = selected
var = 0

# %%
chart = alt.vconcat()
new_line = '\n'
for rr in range(100):
    row = alt.hconcat()
    for cc in range(6):
        if var<len(varnames):
            idx_X_set = X_cols.index(varnames[var])
            row |= plot_factor_map(
                factor_map(gtm_model, X_set.iloc[:,idx_X_set]),
                title=[varnames[var][0:17], varnames[var][17:34]])
            var = var + 1
    chart &= row

chart




In [16]:
X_cols

['Amplitude_g',
 'AndersonDarling_g',
 'Autocor_length_g',
 'Beyond1Std_g',
 'CAR_mean_g',
 'CAR_sigma_g',
 'CAR_tau_g',
 'Con_g',
 'Eta_e_g',
 'FluxPercentileRatioMid20_g',
 'FluxPercentileRatioMid35_g',
 'FluxPercentileRatioMid50_g',
 'FluxPercentileRatioMid65_g',
 'FluxPercentileRatioMid80_g',
 'Freq1_harmonics_amplitude_0_g',
 'Freq1_harmonics_amplitude_1_g',
 'Freq1_harmonics_amplitude_2_g',
 'Freq1_harmonics_amplitude_3_g',
 'Freq1_harmonics_rel_phase_1_g',
 'Freq1_harmonics_rel_phase_2_g',
 'Freq1_harmonics_rel_phase_3_g',
 'Freq2_harmonics_amplitude_0_g',
 'Freq2_harmonics_amplitude_1_g',
 'Freq2_harmonics_amplitude_2_g',
 'Freq2_harmonics_amplitude_3_g',
 'Freq2_harmonics_rel_phase_1_g',
 'Freq2_harmonics_rel_phase_2_g',
 'Freq2_harmonics_rel_phase_3_g',
 'Freq3_harmonics_amplitude_0_g',
 'Freq3_harmonics_amplitude_1_g',
 'Freq3_harmonics_amplitude_2_g',
 'Freq3_harmonics_amplitude_3_g',
 'Freq3_harmonics_rel_phase_1_g',
 'Freq3_harmonics_rel_phase_2_g',
 'Freq3_harmonics_rel_

In [41]:
print(varnames_r)

['Amplitude_r', 'dif_min_mean_r', 'dif_min_median_r', 'npeaks_1to2_r', 'npeaks_2to5_r', 'npeaks_above5_r', 'Eta_e_r', 'CAR_sigma_r', 'Freq1_harmonics_amplitude_0_r', 'Skew_r', 'LinearTrend_r', 'freq_pwr_max_r', 'Std_r', 'MedianAbsDev_r', 'stdstilllev_t20s10_r', 'Mean_r', 'min_mag_r', 'n_obs_r']


In [42]:
def plot_factor_map2(df_map, title='Factor Map',fig_size=(150,150),node_size=1):
    #df_map['size']=df_map['size']*node_size
    return alt.Chart(df_map).mark_square().encode(
        x=alt.X('x1',axis=None),
        y=alt.Y('x2',axis=None),
        color=alt.Color('scale:Q',
                        scale=alt.Scale(scheme='turbo')),
        size=alt.value(250),
        tooltip=['x1','x2','scale:Q'],
        #opacity='density'
    ).properties(title=title, width=fig_size[0], height=fig_size[1])

# Select subset of columns

varnames_g = ['Amplitude_g','dif_min_mean_g','dif_min_median_g','npeaks_1to2_g','npeaks_2to5_g',
            'npeaks_above5_g','Eta_e_g','CAR_sigma_g','Freq1_harmonics_amplitude_0_g','Skew_g',
            'LinearTrend_g','freq_pwr_max_g','Std_g','MedianAbsDev_g','stdstilllev_t20s10_g',
            'Mean_g','min_mag_g','n_obs_g'
            ]
# Remove _g and add _r
varnames_r = [v.replace('_g','_r') for v in varnames_g]

# Colour and Gaia features
varnames_gaia = ['parallax','pm','clr_mean','clr_bright','bp_rp','bp_g','g_rp','StetsonJ','StetsonL']

# NAM varnames
varnames_nam = ['bp_rp','nu_eff_used_in_astrometry','parallax','absmag_g',
                'PeriodLS_g','npeaks_1to2_g','npeaks_2to5_g','npeaks_above5_g',
                'pnts_leq_rollMedWin20-5mag_r','stdstilllev_t20s10_g','Gskew_g','n_obs_g',
                'dif_min_median_g','kurtosis_r','LinearTrend_r','StetsonJ']


# varnames = selected
var = 0
feature_list = varnames_gaia

# %%
chart = alt.vconcat().configure_title(fontSize=16,fontWeight='bold')
count = 0
count = 0
new_line = '\n'
for rr in range(100):
    row = alt.hconcat()
    for cc in range(3):
        if var<len(feature_list):
            idx_X_set = X_cols.index(feature_list[var])
            row |= plot_factor_map2(
                factor_map(gtm_model, X_set.iloc[:,idx_X_set]),
                title=[feature_list[var][0:20], feature_list[var][20:34]])
            var = var + 1
    chart &= row

chart