In [2]:
!pip install streamlit



In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

industry_data = pd.read_csv('datasets/industry_data.csv',index_col=0).transpose()
innovation_data = pd.read_csv('datasets/innovation_data.csv').fillna('None')
internal_data = pd.read_csv('datasets/internal_data.csv',index_col=0).transpose()

industry_data.head()

Field,Security,Humanities,Nat. Sci,Health,AI Ethics,Big Data,Robotics,Documents,Multimedia,NLP,KRR,Graphs,DL/ML
DICE,,,,,,Average,,Strong,,Strong,Good,Good,Good
MagICL,,Average,,Average,,Average,,,Strong,,,,Good
BioHIT,,,,Strong,,Good,,,,Good,Average,Strong,Good
CAKT,,Average,,,Good,Good,,Good,,Strong,Good,,Strong
CER,Average,,,Good,,Strong,,,,,Strong,Good,Strong


In [2]:
%matplotlib inline

In [3]:
innovation_data.head()

Unnamed: 0,Team,Field 1,Readiness,Demo 1,Field 2,Readiness.1,Demo 2,Field 3,Readiness.2,Demo 3,Field 4,Readiness.3,Demo 4,Field 5,Readiness.4,Demo 5,Field 6,Readiness.5,Demo 6,Number of Demos
0,DICE,Mistatement Identification,Ongoing,,Bankruptcy Prediction,Ongoing,Yes,,,,,,,,,,,,,1
1,MagCIL,Music Information Retrieval,Algorithms,,Speech Analytics,Algorithms,,Video Analysis,Algorithms,,Image Recognition,Ongoing,,Sound Scene Recognition,Datasets,,Soundscape Analysis,Datasets,,0
2,BioHIT,Cancer Prediction,Datasets,Yes,Drug-drug Interaction,Datasets,Yes,Genetic Therapy,Algorithms,Yes,,,,,,,,,,3
3,CAKT,,,,,,,,,,,,,,,,,,,0
4,CER,Maritime Situational Awareness,Datasets,Yes,Fleet Management,Datasets,Yes,Activity Recognition,Datasets,Yes,Forecasting in Cancer Cell Simulations,Datasets,Yes,,,,,,,4


In [4]:
internal_data.head()

Unnamed: 0,Funding,Application-Oriented,Number of Members,Academic Collaborations,System Maturity,Demos,Industrial Collaborations
DICE,Average,Good,Average,Good,Good,Good,Average
MagICL,Strong,Strong,Strong,Strong,Strong,Strong,Strong
BioHIT,Strong,Strong,Good,Good,Good,Good,Average
CAKT,Strong,Strong,Good,Strong,Strong,Strong,Strong
CER,Good,Good,Strong,,Strong,Strong,


In [5]:
mapping = {
    'Strong': 3,
    'Good': 2,
    'Average': 1,
    'None': 0
}

internal_columns = ['Funding', 'Application-Oriented', 'Demos', 'Industrial Collaborations', 'System Maturity', 'Number of Members', 'Academic Collaborations']

industry_columns = ['Security', 'Humanities', 'Nat. Sci', 'Health', 'AI Ethics', 'Big Data', 
                                'Robotics', 'Documents', 'Multimedia', 'NLP', 'KRR', 'Graphs', 'DL/ML', ]

for column in internal_columns:
    if column in internal_data.columns:
        internal_data[column] = internal_data[column].map(mapping).fillna(0)

for column in industry_columns:
    if column in industry_data.columns:
        industry_data[column] = industry_data[column].map(mapping).fillna(0)

In [6]:
internal_data.head()

Unnamed: 0,Funding,Application-Oriented,Number of Members,Academic Collaborations,System Maturity,Demos,Industrial Collaborations
DICE,1.0,2.0,1,2.0,2.0,2.0,1.0
MagICL,3.0,3.0,3,3.0,3.0,3.0,3.0
BioHIT,3.0,3.0,2,2.0,2.0,2.0,1.0
CAKT,3.0,3.0,2,3.0,3.0,3.0,3.0
CER,2.0,2.0,3,0.0,3.0,3.0,0.0


In [7]:
industry_data.head()

Field,Security,Humanities,Nat. Sci,Health,AI Ethics,Big Data,Robotics,Documents,Multimedia,NLP,KRR,Graphs,DL/ML
DICE,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,3.0,2.0,2.0,2.0
MagICL,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,2.0
BioHIT,0.0,0.0,0.0,3.0,0.0,2.0,0.0,0.0,0.0,2.0,1.0,3.0,2.0
CAKT,0.0,1.0,0.0,0.0,2.0,2.0,0.0,2.0,0.0,3.0,2.0,0.0,3.0
CER,1.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,2.0,3.0


In [8]:
from sklearn.preprocessing import StandardScaler
def scale_data(df):
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df)
    scaled_df = pd.DataFrame(scaled_data, index=df.index, columns=df.columns)
    return scaled_df

industry_data_s = scale_data(industry_data)
internal_data_s = scale_data(internal_data)

In [15]:
feature_weights = {
    'Demos': 1,
    'Industrial Collaborations': 1,
    'Academic Collaborations': 1,
    'Application-Oriented': 1,
    'System Maturity': 1,
    'Funding': 1,
    'Number of Members': 1,
}

def calculate_performance_score(row,weights):
        
    """
    Calculate a performance score for a given row based on the feature values and their corresponding weights.

    Parameters:
    - row (Series): A row of data from the DataFrame.
    - weights (dict): A dictionary where keys are feature names and values are the weights for those features.

    Returns:
    - score (int): The calculated performance score for the row.
    """
    score = 0
    for feature, weight in weights.items():
        if feature in row:
            score += weight * row[feature]
    return score

In [10]:
def clean_innovation_data(df):
    readiness_cols = [col for col in df.columns if 'Readiness' in col]
    
    df[readiness_cols] = df[readiness_cols].fillna('None')
    
    df = df.fillna('None')
    
    return df

In [11]:
clean_innovation_data(innovation_data)

Unnamed: 0,Team,Field 1,Readiness,Demo 1,Field 2,Readiness.1,Demo 2,Field 3,Readiness.2,Demo 3,Field 4,Readiness.3,Demo 4,Field 5,Readiness.4,Demo 5,Field 6,Readiness.5,Demo 6,Number of Demos
0,DICE,Mistatement Identification,Ongoing,,Bankruptcy Prediction,Ongoing,Yes,,,,,,,,,,,,,1
1,MagCIL,Music Information Retrieval,Algorithms,,Speech Analytics,Algorithms,,Video Analysis,Algorithms,,Image Recognition,Ongoing,,Sound Scene Recognition,Datasets,,Soundscape Analysis,Datasets,,0
2,BioHIT,Cancer Prediction,Datasets,Yes,Drug-drug Interaction,Datasets,Yes,Genetic Therapy,Algorithms,Yes,,,,,,,,,,3
3,CAKT,,,,,,,,,,,,,,,,,,,0
4,CER,Maritime Situational Awareness,Datasets,Yes,Fleet Management,Datasets,Yes,Activity Recognition,Datasets,Yes,Forecasting in Cancer Cell Simulations,Datasets,Yes,,,,,,,4
5,DIA,,,,,,,,,,,,,,,,,,,0
6,KMU,IT Systems and Knowledge Management,Ongoing,,Intelligent DSS,Ongoing,,Security of Cloud-Based Services,Ongoing,,Edge Computing and AI-Driven Virtual Assistants,Ongoing,,Competency-Based Learning,Ongoing,,,,,3
7,RoboSKEL,Effective Data Colletion,Ongoing,,AI Assistance in Data Relevance,Ongoing,,Learning with small datasets,Ongoing,,Design Problems,Ongoing,,,,,,,,4
8,AI Politeia,,,,,,,,,,,,,,,,,,,0
9,ARTIFACT,,,,,,,,,,,,,,,,,,,0


In [12]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import base64
import io
import webbrowser

In [16]:
# Define the Dash app

# Add hover templates for more detailed tooltips
def create_heatmap(df):
    fig = px.imshow(df, text_auto=True, aspect="auto", color_continuous_scale='Blues')
    fig.update_layout(title="Heatmap of Data")
    fig.update_traces(hovertemplate="Value: %{z}")
    return fig

def create_correlation_matrix(df):
    correlation = df.corr()
    fig = ff.create_annotated_heatmap(
        z=correlation.values,
        x=list(correlation.columns),
        y=list(correlation.index),
        colorscale='Viridis',
        annotation_text=correlation.round(2).values,
        showscale=True
    )
    fig.update_layout(title='Correlation Matrix')
    return fig

# Function to create box plot using Plotly
def create_box_plot(df):
    fig = px.box(df, orientation='h', title='Distribution of Strength by Industry')
    fig.update_layout(
        xaxis_title='Involvement Level',
        yaxis_title='Industry',
        boxmode='group'  # ensures that multiple traces are displayed side by side
    )
    return fig

def create_kde_plot(df):
    fig = go.Figure()
    for column in df.columns:
        fig.add_trace(go.Scatter(
            x=df[column],
            y=df[column].value_counts().sort_index(),
            mode='lines',
            name=column,
            fill='tozeroy'
        ))
    fig.update_layout(
        title='Distribution of Involvement Level by Industry (KDE)',
        xaxis_title='Involvement Level',
        yaxis_title='Density',
        legend_title='Industry'
    )
    return fig

def create_histogram(df):
    hist_data = [df[column].dropna().values for column in df.columns]
    group_labels = df.columns.tolist()

    fig = ff.create_distplot(hist_data, group_labels, bin_size=0.5, show_hist=True, show_rug=False)
    fig.update_layout(
        title='Distribution of Involvement Level by Industry Strength',
        xaxis_title='Involvement Level',
        yaxis_title='Frequency',
        barmode='overlay',
        legend_title='Industry'
    )
    return fig
    


def plot_readiness_counts(df):
    readiness_cols = [col for col in df.columns if 'Readiness' in col]
    readiness_counts = df[readiness_cols].apply(pd.Series.value_counts).fillna(0).sum(axis=1)
    if 'None' in readiness_counts.index:
        readiness_counts = readiness_counts.drop('None')

    fig = px.bar(readiness_counts, x=readiness_counts.index, y=readiness_counts.values, title='Distribution of Readiness Level')
    fig.update_layout(
        xaxis_title='Readiness',
        yaxis_title='Count',
        bargap=0.2
    )
    return fig


def plot_performance_score(df):
    df_scored = df.copy()
    df_scored['Performance Score'] = df.apply(calculate_performance_score, axis=1, weights=feature_weights)
    df_scored.reset_index(inplace=True)
    df_scored.rename(columns={'index': 'Team'}, inplace=True)
    
    fig = px.bar(df_scored, x='Team', y='Performance Score', color='Team', title='Team Performance Scores', 
                 color_discrete_sequence=px.colors.sequential.Viridis)
    fig.update_layout(
        xaxis_title='Team', 
        yaxis_title='Performance Score', 
        xaxis_tickangle=-45
    )
    return fig

def plot_performance_distribution(df):
    """
    Plots a histogram of performance scores with a kernel density estimate (KDE).

    Parameters:
    - df (DataFrame): The DataFrame containing the team activity data and performance scores.
    
    Returns:
    - fig: A Plotly figure object representing the performance score distribution.
    """
    df_scored = df.copy()
    df_scored['Performance Score'] = df.apply(calculate_performance_score, axis=1, weights=feature_weights)
    
    fig = px.histogram(df_scored, x='Performance Score', nbins=20, marginal='rug', title='Performance Score Distribution')
    fig.update_layout(
        xaxis_title='Performance Score', 
        yaxis_title='Frequency'
    )
    return fig



In [17]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])


# Define the layout
app.layout = dbc.Container([
    dbc.NavbarSimple(
        brand="Interactive Data Analysis Dashboard",
        brand_href="#",
        color="primary",
        dark=True,
    ),
    dbc.Row([
        dbc.Col(
            dbc.Card([
                dbc.CardHeader("Controls"),
                dbc.CardBody([
                    html.Div([
                        dbc.Label("Select Dataset"),
                        dcc.Dropdown(
                            id='dataset-dropdown',
                            options=[
                                {'label': 'Internal Data (Raw)', 'value': 'internal_raw'},
                                {'label': 'Industry Data (Raw)', 'value': 'industry_raw'},
                                {'label': 'Internal Data (Scaled)', 'value': 'internal_scaled'},
                                {'label': 'Industry Data (Scaled)', 'value': 'industry_scaled'},
                                {'label': 'Innovation Data (Features)', 'value': 'innovation_features'}
                            ],
                            value='internal_raw',
                            className='dropdown'
                        ),
                        dbc.Tooltip(
                            "Select a dataset to visualize.",
                            target="dataset-dropdown"
                        )
                    ]),
                    html.Div([
                        dbc.Label("Select Visualization"),
                        dcc.RadioItems(
                            id='visualization-type',
                            options=[
                                {'label': 'Heatmap', 'value': 'heatmap'},
                                {'label': 'Correlation Matrix', 'value': 'correlation'},
                                {'label': 'Box Plot (Industry Data Only)', 'value': 'boxplot'},
                                {'label': 'KDE Plot (Industry Data Only)', 'value': 'kdeplot'},
                                {'label': 'Histogram (Industry Data Only)', 'value': 'histogram'},
                                {'label': 'Readiness Counts (Innovation Data Only)', 'value': 'readiness_counts'},
                                {'label': 'Performance Score (Internal Data Only)', 'value': 'performance_score'},
                                {'label': 'Performance Distribution (Internal Data Only)', 'value': 'performance_distribution'},
                               
                            ],
                            value='heatmap',
                            labelStyle={'display': 'block'},
                            className='radio-items'
                        ),
                        dbc.Tooltip(
                            "Select the type of visualization to display.",
                            target="visualization-type"
                        )
                    ])
                ])
            ], className='card'), width=3
        ),
        dbc.Col(
            dcc.Loading(
                id="loading",
                type="default",
                className='loading-container',
                children=dcc.Graph(id='visualization-graph', className='graph-container')
            ), width=9
        )
    ], className="mt-4"),
    dbc.Row([
        dbc.Col(
            dbc.Card([
                dbc.CardBody([
                    html.H4("About this Dashboard", className="card-title"),
                    html.P(
                        "This interactive dashboard allows you to visualize different datasets and their "
                        "characteristics using various types of plots such as heatmaps, correlation matrices, "
                        "box plots, and more. Select a dataset and a visualization type to get started.",
                        className="card-text"
                    )
                ])
            ], className='mt-4')
        )
    ])
], fluid=True)

# Define the callbacks
@app.callback(
    Output('visualization-graph', 'figure'),
    [Input('dataset-dropdown', 'value'),
     Input('visualization-type', 'value')]
)
def update_visualization(selected_dataset, visualization_type):
    if selected_dataset == 'internal_raw':
        data = internal_data.copy()
    elif selected_dataset == 'industry_raw':
        data = industry_data.copy()
    elif selected_dataset == 'internal_scaled':
        data = internal_data_s.copy()
    elif selected_dataset == 'industry_scaled':
        data = industry_data_s.copy()
    elif selected_dataset == 'innovation_features':
        data = innovation_data.copy()
    else:
        return go.Figure()

    if visualization_type == 'heatmap':
        return create_heatmap(data)
    elif visualization_type == 'correlation':
        return create_correlation_matrix(data)
    elif visualization_type == 'boxplot' and 'industry' in selected_dataset:
        return create_box_plot(data)
    elif visualization_type == 'kdeplot' and 'industry' in selected_dataset:
        return create_kde_plot(data)
    elif visualization_type == 'histogram' and 'industry' in selected_dataset:
        return create_histogram(data)
    elif visualization_type == 'readiness_counts' and 'innovation' in selected_dataset:
        return plot_readiness_counts(data)
    elif visualization_type == 'performance_score' and 'internal' in selected_dataset:
        return plot_performance_score(data)
    elif visualization_type == 'performance_distribution' and 'internal' in selected_dataset:
        return plot_performance_distribution(data)
    else:
        return go.Figure()

if __name__ == '__main__':
    app.run_server(debug=True)


if __name__ == '__main__':
    webbrowser.open_new("http://localhost:8050/")
    app.run_server(debug=True, use_reloader=False, port=8050)
    

AssertionError: The setup method 'errorhandler' can no longer be called on the application. It has already handled its first request, any changes will not be applied consistently.
Make sure all imports, decorators, functions, etc. needed to set up the application are done before running it.