In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("Titanic-Dataset.csv")
print("Dataset loaded successfully!\n")
print(df.head())

# Show available columns
print("\nAvailable Columns in the dataset:")
for column_name in df.columns:
    print(column_name)

# User input for column selection
selected_cols = input(
    "\nEnter column names you want to work with (separated by commas): "
)

# Clean column names
selected_cols = [col.strip() for col in selected_cols.split(",")]

# Validate columns
valid_cols = []
for col in selected_cols:
    if col in df.columns:
        valid_cols.append(col)

if not valid_cols:
    print("\nNo valid columns selected!")
    exit()

# Display selected columns
df_selected = df[valid_cols]
print("\nYou selected these columns:")
print(df_selected.head())

# ===============================
# Visualization Logic
# ===============================

# ONE COLUMN
if len(valid_cols) == 1:
    col = valid_cols[0]

    if pd.api.types.is_numeric_dtype(df[col]):
        df[col].hist(bins=20)
        plt.title(f"Histogram of {col}")
        plt.xlabel(col)
        plt.ylabel("Frequency")
        print("\nNUMERIC data detected → Histogram used")

    else:
        df[col].value_counts().plot(kind="bar")
        plt.title(f"Bar Chart of {col}")
        plt.xlabel(col)
        plt.ylabel("Count")
        print("\nCATEGORICAL data detected → Bar chart used")

# TWO COLUMNS
elif len(valid_cols) == 2:
    col1, col2 = valid_cols

    if pd.api.types.is_numeric_dtype(df[col1]) and pd.api.types.is_numeric_dtype(df[col2]):
        df.plot.scatter(x=col2, y=col1)
        plt.title(f"Scatter Plot of {col1} vs {col2}")
        print("\nTwo NUMERIC columns → Scatter plot used")

    elif pd.api.types.is_numeric_dtype(df[col1]) and not pd.api.types.is_numeric_dtype(df[col2]):
        df.boxplot(column=col1, by=col2)
        plt.title(f"Boxplot of {col1} by {col2}")
        plt.suptitle("")
        print("\nNUMERIC vs CATEGORICAL → Boxplot used")

    elif not pd.api.types.is_numeric_dtype(df[col1]) and pd.api.types.is_numeric_dtype(df[col2]):
        df.boxplot(column=col2, by=col1)
        plt.title(f"Boxplot of {col2} by {col1}")
        plt.suptitle("")
        print("\nCATEGORICAL vs NUMERIC → Boxplot used")

    else:
        pd.crosstab(df[col1], df[col2]).plot(kind="bar")
        plt.title(f"Grouped Bar Chart of {col1} vs {col2}")
        print("\nTwo CATEGORICAL columns → Grouped bar chart used")

# THREE COLUMNS
elif len(valid_cols) == 3:
    col1, col2, col3 = valid_cols

    if (
        pd.api.types.is_numeric_dtype(df[col1])
        and pd.api.types.is_numeric_dtype(df[col2])
        and pd.api.types.is_numeric_dtype(df[col3])
    ):
        from mpl_toolkits.mplot3d import Axes3D

        fig = plt.figure()
        ax = fig.add_subplot(111, projection="3d")
        ax.scatter(df[col1], df[col2], df[col3])
        ax.set_xlabel(col1)
        ax.set_ylabel(col2)
        ax.set_zlabel(col3)
        plt.title(f"3D Scatter Plot of {col1}, {col2}, {col3}")
        print("\nThree NUMERIC columns → 3D scatter plot used")

    elif (
        pd.api.types.is_numeric_dtype(df[col1])
        and pd.api.types.is_numeric_dtype(df[col2])
        and not pd.api.types.is_numeric_dtype(df[col3])
    ):
        sns.catplot(x=col3, y=col1, hue=col2, kind="box", data=df)
        plt.title(f"Boxplots of {col1} & {col2} by {col3}")
        print("\n2 NUMERIC + 1 CATEGORICAL → Facet box plots used")

    elif (
        pd.api.types.is_numeric_dtype(df[col1])
        and not pd.api.types.is_numeric_dtype(df[col2])
        and not pd.api.types.is_numeric_dtype(df[col3])
    ):
        sns.catplot(x=col2, hue=col3, kind="count", data=df)
        plt.title(f"Grouped Bar Chart of {col2} & {col3}")
        print("\n1 NUMERIC + 2 CATEGORICAL → Grouped bar chart used")

    else:
        pd.crosstab([df[col1], df[col2]], df[col3]).plot(
            kind="bar", stacked=True
        )
        plt.title(f"Stacked Bar Chart of {col1}, {col2}, {col3}")
        print("\nThree CATEGORICAL columns → Stacked bar chart used")

# MORE THAN THREE COLUMNS
else:
    print("\nMore than 3 columns selected → Scatter matrix shown")
    pd.plotting.scatter_matrix(df[valid_cols], figsize=(8, 8))
    plt.suptitle("Scatter Matrix of Selected Columns")

# Show plots
plt.show()


ModuleNotFoundError: No module named 'pandas'