In [52]:
from ucimlrepo import fetch_ucirepo 
import plotly.express as px
import random
import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_regression

success = False

while not success:
    # Load all available seaborn datasets
    dataset_id = np.random.randint(low=1, high=967, size=1)[0]

    # Initialize lists to store column pairs, MI scores, and dataset names
    all_column_pairs = []
    all_mi_scores = []
    all_dataset_names = []

    # Loop over each dataset to calculate MI for numeric column pairs

    try:
        # fetch dataset 
        data_pkg = fetch_ucirepo(id=int(dataset_id)) 
        data = pd.concat((data_pkg.data.features, data_pkg.data.targets), axis=1)
        name = data_pkg.metadata.name
        url = data_pkg.metadata.repository_url
        abstract = data_pkg.metadata.abstract
        
        # Filter numeric columns to ensure compatibility for MI calculation
        numeric_columns = data.select_dtypes(include='number').columns.tolist()
        
        # Skip datasets with fewer than 2 numeric columns
        if len(numeric_columns) < 2:
            continue

        # Calculate mutual information for each unique pair of columns
        for i in range(len(numeric_columns)):
            for j in range(i + 1, len(numeric_columns)):
                x_col, y_col = numeric_columns[i], numeric_columns[j]
                
                # Drop rows with missing values in the selected columns
                data_pair = data[[x_col, y_col]].dropna()
                
                # Check if there's enough data left for meaningful MI calculation
                if data_pair.shape[0] > 1:
                    x = data_pair[x_col].values.reshape(-1, 1)
                    y = data_pair[y_col].values
                    mi_score = mutual_info_regression(x, y)[0]
                    
                    # Store the dataset, column pair, and MI score
                    all_dataset_names.append(dataset_id)
                    all_column_pairs.append((x_col, y_col))
                    all_mi_scores.append(mi_score)
        success = True
    except Exception as e:
        # print(f"Error loading or processing dataset '{dataset_id}': {e}")
        continue
    

# Normalize mutual information scores for weighted sampling
weights = np.array(all_mi_scores) / sum(all_mi_scores)

# Perform weighted random sampling to select a dataset and column pair
selected_index = random.choices(range(len(all_column_pairs)), weights=weights, k=1)[0]
x_col, y_col = all_column_pairs[selected_index]

# Create the scatter plot using Plotly
fig = px.scatter(data, x=x_col, y=y_col)

# Set up the layout to start without tick labels or axis labels
fig.update_layout(
    title=None,
    xaxis_title=None,
    yaxis_title=None,
    showlegend=False,
    updatemenus=[
        # Button to toggle tick labels
        {
            "buttons": [
                {
                    "label": "Show Tick Labels",
                    "method": "relayout",
                    "args": [{"xaxis.showticklabels": True, "yaxis.showticklabels": True}],
                },
                {
                    "label": "Hide Tick Labels",
                    "method": "relayout",
                    "args": [{"xaxis.showticklabels": False, "yaxis.showticklabels": False}],
                },
            ],
            "direction": "down",
            "showactive": True,
            "x": 0.3,
            "xanchor": "center",
            "y": 1.5,  # Increased y value for more space
            "yanchor": "top",
            "pad": {"r": 10},
        },
        # Button to toggle axis labels
        {
            "buttons": [
                {
                    "label": "Show Axis Labels",
                    "method": "relayout",
                    "args": [{"xaxis.title.text": x_col, "yaxis.title.text": y_col}],
                },
                {
                    "label": "Hide Axis Labels",
                    "method": "relayout",
                    "args": [{"xaxis.title.text": None, "yaxis.title.text": None}],
                },
            ],
            "direction": "down",
            "showactive": True,
            "x": 0.5,
            "xanchor": "center",
            "y": 1.5,  # Increased y value for more space
            "yanchor": "top",
            "pad": {"r": 10},
        },
        # Button to toggle figure title
        {
            "buttons": [
                {
                    "label": "Show Title",
                    "method": "relayout",
                    "args": [{"title": f""""{name}"<br> {abstract} """}],  # Replace with your desired title
                },
                {
                    "label": "Hide Title",
                    "method": "relayout",
                    "args": [{"title": None}],
                },
            ],
            "direction": "down",
            "showactive": True,
            "x": 0.7,
            "xanchor": "center",
            "y": 1.5,  # Increased y value for more space
            "yanchor": "top",
            "pad": {"r": 10},
        }
    ]
)

print(url)

# Initially hide tick labels and axis labels
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)

fig.show()


https://archive.ics.uci.edu/dataset/16/breast+cancer+wisconsin+prognostic
