In [1]:
    import micropip

    # Use await to ensure the installation completes before proceeding
    # This might take a moment depending on your internet connection and the size of the packages
    print("Attempting to install packages using micropip...")
    await micropip.install(['seaborn', 'matplotlib', 'pandas', 'ipywidgets'])
    print("Packages installed (or already present) using micropip.")
    

Attempting to install packages using micropip...
Packages installed (or already present) using micropip.


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact, Dropdown, FloatSlider, IntSlider, VBox, HBox, Output
from IPython.display import display, clear_output

# Load the dataset
try:
    # Make sure the path to your CSV file is correct relative to your notebook's location
    # If the file is in the same directory as the notebook, just use 'SUM CALCULATED.csv'
    # If it's in a subfolder named 'MultipleFiles', then 'MultipleFiles/SUM CALCULATED.csv' is correct.
    df = pd.read_csv('SUM CALCULATED.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'MultipleFiles/SUM CALCULATED.csv' not found.")
    print("Please ensure the file is in the correct directory or update the path.")
    df = None # Explicitly set df to None if loading fails

if df is not None:
    # Display the first few rows and columns to understand the data
    print("First 5 rows of the dataset:")
    display(df.head())
    print("\nColumns in the dataset:")
    display(df.columns.tolist())
else:
    print("\nDataFrame 'df' was not loaded. Please resolve the FileNotFoundError before proceeding.")



Matplotlib is building the font cache; this may take a moment.


Dataset loaded successfully.
First 5 rows of the dataset:


Unnamed: 0,Brand Image,Brand Perception,Brand Advocacy,Brand Analytics,Marketing Intelligence,Artificial Intelligence,Brand Love,Age,Income,Gender,Work,Education
0,13.0,20.0,21.0,23.0,21.0,21.0,25.0,2,4,2,2,2
1,16.0,20.0,18.0,20.0,15.0,19.0,19.0,1,4,1,1,2
2,15.0,16.0,14.0,22.0,21.0,24.0,23.0,3,2,2,2,1
3,16.0,17.0,23.0,28.0,30.0,27.0,25.0,1,1,2,6,2
4,10.0,15.0,17.0,20.0,18.0,17.0,16.0,1,4,2,1,2



Columns in the dataset:


['Brand Image',
 'Brand Perception',
 'Brand Advocacy',
 'Brand Analytics',
 'Marketing Intelligence',
 'Artificial Intelligence',
 'Brand Love',
 'Age',
 'Income',
 'Gender',
 'Work',
 'Education']

In [3]:
# This block will only execute if df was successfully loaded in the previous cell
if df is not None:
    def calculate_brand_love_score(data, weights=None):
        """
        Calculates a composite brand love score based on various brand metrics.

        Args:
            data (pd.DataFrame): The input DataFrame containing brand metrics.
            weights (dict, optional): A dictionary of weights for each metric.
                                       Keys should be column names, values their weights.
                                       If None, default weights are used.

        Returns:
            pd.Series: A Series containing the calculated brand love score for each row.
        """
        if weights is None:
            weights = {
                'Brand Image': 0.15,
                'Brand Perception': 0.15,
                'Brand Advocacy': 0.20,
                'Brand Analytics': 0.10,
                'Marketing Intelligence': 0.10,
                'Artificial Intelligence': 0.10,
                'Brand Love': 0.20  # Direct measure of brand love, given higher weight
            }

        # Ensure all weighted columns exist in the data
        for col in list(weights.keys()): # Iterate over a copy of keys to allow modification
            if col not in data.columns:
                print(f"Warning: Column '{col}' not found in data. Setting its weight to 0 for score calculation.")
                weights[col] = 0 # Set weight to 0 if column is missing

        score = pd.Series(0.0, index=data.index)
        total_weight = 0

        for col, weight in weights.items():
            if col in data.columns and weight > 0: # Only add if column exists and has a positive weight
                score += data[col] * weight
                total_weight += weight

        if total_weight > 0:
            score /= total_weight
        else:
            print("Warning: No valid columns with positive weights found for brand love score calculation. Returning zeros.")
            score = pd.Series(0.0, index=data.index)

        return score

    df['Brand_Love_Score'] = calculate_brand_love_score(df)
    print("\nBrand Love Score calculated and added to the DataFrame.")
    display(df[['Brand Love', 'Brand_Love_Score']].head())
else:
    print("\nSkipping Brand Love Score calculation as DataFrame 'df' was not loaded.")




Brand Love Score calculated and added to the DataFrame.


Unnamed: 0,Brand Love,Brand_Love_Score
0,25.0,20.65
1,19.0,18.2
2,23.0,18.75
3,25.0,23.05
4,16.0,15.85


In [4]:
# This block will only execute if df was successfully loaded and processed
if df is not None:
    # Define interactive widgets
    # Ensure min/max values are correctly handled for sliders, especially if data is sparse
    age_min = df['Age'].min() if not df['Age'].empty else 1
    age_max = df['Age'].max() if not df['Age'].empty else 100
    income_min = df['Income'].min() if not df['Income'].empty else 1
    income_max = df['Income'].max() if not df['Income'].empty else 100

    age_slider = IntSlider(min=age_min, max=age_max, step=1, description='Age Filter:', value=age_max)
    income_slider = IntSlider(min=income_min, max=income_max, step=1, description='Income Filter:', value=income_max)

    # Ensure unique values are correctly handled for dropdowns
    gender_options = ['All'] + df['Gender'].unique().tolist() if 'Gender' in df.columns else ['All']
    work_options = ['All'] + df['Work'].unique().tolist() if 'Work' in df.columns else ['All']
    education_options = ['All'] + df['Education'].unique().tolist() if 'Education' in df.columns else ['All']

    gender_dropdown = Dropdown(options=gender_options, description='Gender Filter:')
    work_dropdown = Dropdown(options=work_options, description='Work Filter:')
    education_dropdown = Dropdown(options=education_options, description='Education Filter:')

    # Ensure all columns are available for plot dropdowns
    all_columns = df.columns.tolist()
    plot_type_dropdown = Dropdown(options=['Histogram', 'Scatter Plot', 'Box Plot', 'Correlation Heatmap'], description='Plot Type:')
    x_axis_dropdown = Dropdown(options=all_columns, description='X-axis:')
    y_axis_dropdown = Dropdown(options=all_columns, description='Y-axis:')
    hue_dropdown = Dropdown(options=['None'] + all_columns, description='Group By:')

    # Output widget to display plots
    output_area = Output()

    def update_dashboard(age, income, gender, work, education, plot_type, x_axis, y_axis, hue_col):
        with output_area:
            clear_output(wait=True)
            filtered_df = df.copy()

            # Apply filters
            # Check if columns exist before filtering to prevent KeyError on filter columns
            if 'Age' in filtered_df.columns:
                filtered_df = filtered_df[filtered_df['Age'] <= age]
            if 'Income' in filtered_df.columns:
                filtered_df = filtered_df[filtered_df['Income'] <= income]
            if 'Gender' in filtered_df.columns and gender != 'All':
                filtered_df = filtered_df[filtered_df['Gender'] == gender]
            if 'Work' in filtered_df.columns and work != 'All':
                filtered_df = filtered_df[filtered_df['Work'] == work]
            if 'Education' in filtered_df.columns and education != 'All':
                filtered_df = filtered_df[filtered_df['Education'] == education]

            if filtered_df.empty:
                print("No data matches the selected filters. Please adjust your selections.")
                return

            plt.figure(figsize=(10, 6))

            if plot_type == 'Histogram':
                if x_axis in filtered_df.columns:
                    sns.histplot(data=filtered_df, x=x_axis, kde=True, hue=hue_col if hue_col != 'None' and hue_col in filtered_df.columns else None)
                    plt.title(f'Distribution of {x_axis}')
                    plt.xlabel(x_axis)
                    plt.ylabel('Frequency')
                else:
                    print(f"Column '{x_axis}' not found in filtered data for histogram.")
            elif plot_type == 'Scatter Plot':
                if x_axis in filtered_df.columns and y_axis in filtered_df.columns:
                    sns.scatterplot(data=filtered_df, x=x_axis, y=y_axis, hue=hue_col if hue_col != 'None' and hue_col in filtered_df.columns else None)
                    plt.title(f'Scatter Plot of {x_axis} vs {y_axis}')
                    plt.xlabel(x_axis)
                    plt.ylabel(y_axis)
                else:
                    print(f"Columns '{x_axis}' or '{y_axis}' not found in filtered data for scatter plot.")
            elif plot_type == 'Box Plot':
                if x_axis in filtered_df.columns:
                    sns.boxplot(data=filtered_df, y=x_axis, x=hue_col if hue_col != 'None' and hue_col in filtered_df.columns else None)
                    plt.title(f'Box Plot of {x_axis}')
                    plt.ylabel(x_axis)
                else:
                    print(f"Column '{x_axis}' not found in filtered data for box plot.")
            elif plot_type == 'Correlation Heatmap':
                numeric_df = filtered_df.select_dtypes(include=['float64', 'int64'])
                if not numeric_df.empty:
                    # Drop columns with all NaN values before correlation calculation
                    numeric_df = numeric_df.dropna(axis=1, how='all')
                    if not numeric_df.empty:
                        corr_matrix = numeric_df.corr()
                        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
                        plt.title('Correlation Heatmap of Numeric Features')
                    else:
                        print("No numeric data available after dropping all-NaN columns for correlation heatmap.")
                else:
                    print("No numeric data available for correlation heatmap.")

            plt.tight_layout()
            plt.show()

    # Link widgets to the update function
    # Ensure default values for dropdowns are valid options
    initial_gender = gender_options[0] if gender_options else 'All'
    initial_work = work_options[0] if work_options else 'All'
    initial_education = education_options[0] if education_options else 'All'

    interactive_dashboard = interact(update_dashboard,
                                     age=age_slider,
                                     income=income_slider,
                                     gender=gender_dropdown,
                                     work=work_dropdown,
                                     education=education_dropdown,
                                     plot_type=plot_type_dropdown,
                                     x_axis=x_axis_dropdown,
                                     y_axis=y_axis_dropdown,
                                     hue_col=hue_dropdown)

    # Arrange widgets in a layout
    filter_widgets = VBox([age_slider, income_slider, gender_dropdown, work_dropdown, education_dropdown])
    plot_widgets = VBox([plot_type_dropdown, x_axis_dropdown, y_axis_dropdown, hue_dropdown])

    ui = HBox([filter_widgets, plot_widgets])
    dashboard_layout = VBox([ui, output_area])

    print("\nBrand Love Score Dashboard:")
    display(dashboard_layout)
else:
    print("\nSkipping dashboard creation as DataFrame 'df' was not loaded.")



interactive(children=(IntSlider(value=6, description='Age Filter:', max=6, min=1), IntSlider(value=4, descript…


Brand Love Score Dashboard:


VBox(children=(HBox(children=(VBox(children=(IntSlider(value=6, description='Age Filter:', max=6, min=1), IntS…