In [7]:
import pandas as pd
import io
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt

# Global variable to store the dataframe
df = None

# File upload widget
file_upload = widgets.FileUpload(accept='.csv', multiple=False)

# Output widgets for displaying results
output = widgets.Output()
eda_output = widgets.Output()

# Dropdown for selecting columns (will be populated after upload)
valid_columns_dropdown = widgets.SelectMultiple(
    description="Columns:",
    options=[],
    disabled=True
)

# Button to confirm selected columns for analysis
confirm_button = widgets.Button(description="Confirm Selection", disabled=True)

# Dropdown for selecting plot type
plot_type_dropdown = widgets.Dropdown(
    options=['Histogram', 'Bar', 'Box', 'Scatter'],
    description="Plot Type:",
    disabled=True
)

# Button to generate the plot
generate_plot_button = widgets.Button(description="Generate Plot", disabled=True)

# Function to load and display CSV info
def load_csv(change):
    global df
    output.clear_output()
    eda_output.clear_output()
    if file_upload.value:
        uploaded_file = file_upload.value[0]  # Directly access the tuple element
        content = uploaded_file['content']
        df = pd.read_csv(io.BytesIO(content))
        with output:
            display(df.head())
            display(df.info())  # Display column names and data types
        valid_columns_dropdown.options = df.columns.tolist()
        valid_columns_dropdown.disabled = False
        confirm_button.disabled = False

# Attach event to file upload
file_upload.observe(load_csv, names='value')

# Function to confirm selected columns for analysis
def confirm_selection(b):
    eda_output.clear_output()
    selected_columns = valid_columns_dropdown.value
    if not selected_columns:
        with eda_output:
            print("No columns selected for analysis. Please select valid columns.")
    else:
        plot_type_dropdown.disabled = False
        generate_plot_button.disabled = False
        with eda_output:
            print(f"Selected columns: {selected_columns}")
            display(plot_type_dropdown)
            display(generate_plot_button)

# Attach event to confirm button
confirm_button.on_click(confirm_selection)

# Function to plot data based on user input
def plot_data(b):
    if df is not None and valid_columns_dropdown.value:
        selected_columns = list(valid_columns_dropdown.value)  # Ensure it's always a list
        plot_type = plot_type_dropdown.value
        
        with eda_output:
            eda_output.clear_output()  # Clear previous plots
            if plot_type == 'Histogram':
                df[selected_columns].hist(bins=15, figsize=(10, 6))
                plt.show()
            elif plot_type == 'Bar':
                for col in selected_columns:
                    df[col].value_counts().plot(kind='bar', figsize=(8, 5))
                    plt.title(f"Bar plot of {col}")
                    plt.show()
            elif plot_type == 'Box':
                df[selected_columns].plot(kind='box', figsize=(10, 6))
                plt.title("Box plot")
                plt.show()
            elif plot_type == 'Scatter' and len(selected_columns) == 2:
                df.plot.scatter(x=selected_columns[0], y=selected_columns[1])
                plt.title(f"Scatter plot: {selected_columns[0]} vs {selected_columns[1]}")
                plt.show()
            else:
                print("Scatter plot requires exactly two columns.")
            print("\nYou can change columns or plot types and generate another plot!")


# Attach event to generate plot button
generate_plot_button.on_click(plot_data)

# Display widgets
display(widgets.VBox([
    widgets.Label("Step 1: Upload a CSV File:"),
    file_upload,
    output,
    widgets.Label("Step 2: Select Valid Columns for Analysis:"),
    valid_columns_dropdown,
    confirm_button,
    eda_output
]))

VBox(children=(Label(value='Step 1: Upload a CSV File:'), FileUpload(value=(), accept='.csv', description='Upl…