In [None]:
import pandas as pd
import umap
import streamlit as st
import plotly.graph_objects as go

In [None]:
# Page layout
st.set_page_config(page_title='Visually-Assisted Performance Evaluation of Metamodels in Stacking Ensemble Learning',layout='wide')

In [None]:
# Sidebar - Collects user input features into dataframe
st.sidebar.header('Upload your model probabilities data')
uploaded_probabilities = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"], key=0)
st.sidebar.header('Upload your target data')
uploaded_target = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"], key=1)
st.sidebar.header('Upload your model data')
uploaded_model = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"], key=2)

In [None]:
# Sidebar - Specify parameter settings
st.sidebar.header('Set Parameters for UMAP')
parameter_umap_n_neighbors = st.sidebar.number_input('Number of neighbors (n_neighbors)', 5)
parameter_umap_metric = st.sidebar.selectbox('Metric', ('euclidean', 'manhattan', 'chebyshev', 'minkowski'))
parameter_umap_min_dist = st.sidebar.number_input('Minimal distance', 0.1)
st.sidebar.write('---')

In [None]:
st.subheader('Dataset')
st.write('The dataset consists of the model probabilities for each model in the ensemble and the target variable.')

In [None]:
# Function to plot the UMAP plot
def create_UMAP_chart(df_probabilities, df_model):
    # Create UMAP
    umap_model = umap.UMAP(n_neighbors=parameter_umap_n_neighbors, metric=parameter_umap_metric, min_dist=parameter_umap_min_dist)
    umap_embedding = umap_model.fit_transform(df_probabilities)
    # Create scatter plot
    fig = go.Figure(data=go.Scatter(x=umap_embedding[:,0], y=umap_embedding[:,1], mode='markers',
                                    marker=dict(size=10,  line=dict(color='MediumPurple',width=2))))
    fig.update_layout(title='UMAP', xaxis_title='UMAP 1', yaxis_title='UMAP 2')
    st.plotly_chart(fig)

In [None]:
if uploaded_probabilities is not None and uploaded_target is not None:
    df_probabilities = pd.read_csv(uploaded_probabilities)
    df_target = pd.read_csv(uploaded_target)
    df_model = pd.read_csv(uploaded_model)
    algo = df_model.algorithm_id.unique()
    # Check if the number of rows in the probabilities dataframe is equal to the number of columns in the target dataframe
    if len(df_probabilities.columns) == len(df_target):
        create_UMAP_chart(df_probabilities, df_target)
    else:
        st.write('The number of columns in the probabilities dataframe is not equal to the number of rows in the target dataframe.')
else:
    st.info('Awaiting for CSV file to be uploaded.')
    if st.button('Press to use Example Dataset'):
        path = r'D:\github\2dv50e\Data\1. Heart Disease'
        df_probabilities = pd.read_csv(path + r'\topModelsProbabilities.csv')
        df_target = pd.read_csv(path + r'\target.csv')
        df_model = pd.read_csv(path + r'\topModels.csv')
        create_UMAP_chart(df_probabilities, df_target)