In [None]:
!pip3 uninstall matplotlib
!pip3 install matplotlib 

In [18]:
import os
import re
import sys
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

Matplotlib is building the font cache; this may take a moment.


In [None]:
DATA_PATH = "data/Customers.csv"


def load_data(DATA_PATH):
    """ "loading data from csv file"""
    if os.path.exists(DATA_PATH):
        print(f"Loading file: {DATA_PATH}")
        return pd.read_csv(DATA_PATH)

    # fallback: find first csv in data folder
    print(f"'{DATA_PATH}' not found. Searching 'data/' for any .csv file...")
    for root, _, files in os.walk("data"):
        for file in files:
            if file.endswith(".csv"):
                found_path = os.path.join(root, file)
                print(f"Found and loading fallback file: {found_path}")
                return pd.read_csv(found_path)

    raise FileNotFoundError("No CSV file found in data directory.")


# --- tweak to print data as the dataframe---

try:
    # 1. Call your function to load the data
    df = pd.DataFrame(load_data(DATA_PATH))

    # 2. Display the first 5 rows of the DataFrame
    print("\n--- DataFrame Head ---")
    print(df.head())

except FileNotFoundError as e:
    print(f"\nError: {e}")

Loading file: data/Customers.csv

--- DataFrame Head ---
   CustomerID  Gender  Age  Annual Income ($)  Spending Score (1-100)  \
0           1    Male   19              15000                      39   
1           2    Male   21              35000                      81   
2           3  Female   20              86000                       6   
3           4  Female   23              59000                      77   
4           5  Female   31              38000                      40   

      Profession  Work Experience  Family Size  
0     Healthcare                1            4  
1       Engineer                3            3  
2       Engineer                1            1  
3         Lawyer                0            2  
4  Entertainment                2            6  


In [None]:
import plotly.graph_objects as go
import plotly.figure_factory as ff

Top = (
    df.groupby(["Gender", "Profession"])["Annual Income ($)"]
    .sum()
    .reset_index()
    .sort_values(by="Annual Income ($)", ascending=False)
)
fig = ff.create_table(Top, height_constant=20)
fig.update_layout(title_text="Top Professions by Annual Income ($)", title_x=0.3)
fig.show()


# ---  showing distribution of income based on gender- for some professions ---

# fig2 = go.Figure()
# fig2.add_trace(go.Scatter(x=Top['Gender','Profession'], y=Top['Annual Income ($)'], mode='lines', name='Income Distribution'))
# fig2.update_layout(title='Income Distribution by gender for Top Professions', xaxis_title='Gender', yaxis_title='Annual Income ($)')
# fig2.show()

# fig2 = go.Figure()
# fig2.add_trace(go.Scatter(x=Top['Gender','Profession'], y=Top['Annual Income ($)'], mode='lines', name='Income Distribution'))
# fig2.update_layout(title='Income Distribution by gender for Top Professions', xaxis_title='Gender', yaxis_title='Annual Income ($)')
# fig2.show()


fig = px.line(
    Top,
    x="Profession",  # X-axis will be professions
    y="Annual Income ($)",  # Y-axis will be income
    color="Gender",  # Separate lines by gender and color them
    title="Income Distribution by Profession, Split by Gender",
    labels={"Annual Income ($)": "Total Annual Income ($)", "Profession": "Profession"},
)

fig.update_traces(
    mode="lines+markers"
)  # Add markers to make individual professions clear
fig.update_layout(hovermode="x unified")  # For a cleaner hover experience

fig.show()

In [None]:
fig = go.Figure()

# Loop through genders to create a separate bar trace for each
for gender in Top["Gender"].unique():
    df_gender = Top[Top["Gender"] == gender]

    fig.add_trace(
        go.Bar(x=df_gender["Profession"], y=df_gender["Annual Income ($)"], name=gender)
    )

# Update layout to group the bars
fig.update_layout(barmode="group", title="Income by Profession and Gender")
fig.show()