In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("\n============= Data Analysis & Visualization Program =============")

# Global dataframe variable
dataframe = None

while True:
    print("\nPlease Select an option:")
    print("   1. Load Dataset")
    print("   2. Explore Data")
    print("   3. Perform Dataframe Operations")
    print("   4. Handle Missing Data")
    print("   5. Generate Descriptive Statistics")
    print("   6. Data Visualization")
    print("   7. Save Changed Dataframe to CSV")
    print("   8. Exit")

    try:
        main_choice = int(input("Enter your choice: "))
    except ValueError:
        print("Invalid choice. Please enter a number between 1-8.")
        continue

    match main_choice:
        case 1:  # Load dataset
            print("\n== Load Dataset ==")
            file = input("Enter the path of the dataset (CSV file): ")
            try:
                dataframe = pd.read_csv(file)
                print("Dataset Loaded Successfully!\n")
            except Exception as e:
                print(f"Error loading dataset: {e}")

        case 2:  # Explore dataset
            if dataframe is None or dataframe.empty:
                print("\nPlease load a dataset first (Option 1).\n")
                continue
            while True:
                print("\n== Explore Dataset ==")
                print("   1. Display the first 5 rows")
                print("   2. Display the last 5 rows")
                print("   3. Display column names")
                print("   4. Display data types")
                print("   5. Display basic info")
                print("   6. Exit")

                exp_choice = input("Enter your choice: ")
                if exp_choice == '1':
                    print("\nFirst 5 rows of dataset:\n", dataframe.head().to_string())
                elif exp_choice == '2':
                    print("\nLast 5 rows of dataset:\n", dataframe.tail().to_string())
                elif exp_choice == '3':
                    print("\nColumns of dataset:", list(dataframe.columns))
                elif exp_choice == '4':
                    print("\nData types of columns:\n", dataframe.dtypes)
                elif exp_choice == '5':
                    print("\nDataset Info:\n")
                    print(dataframe.info())
                elif exp_choice == '6':
                    break
                else:
                    print("Invalid choice...")

        case 3:  # Dataframe Operations
            if dataframe is None:
                print("\nPlease load a dataset first (Option 1).\n")
                continue

            class DataframeOperation:
                def operations(self, df):
                    while True:
                        print("\n== Dataframe Operations ==")
                        print("   1. Describe the dataset")
                        print("   2. Find shape of dataset")
                        print("   3. Exit")

                        op_choice = input("Enter your choice: ")
                        if op_choice == '1':
                            print("\n== Dataset Description ==\n", df.describe().to_string())
                        elif op_choice == '2':
                            print("\n== Shape of Dataset ==\n", df.shape, "(Rows: Columns)")
                        elif op_choice == '3':
                            break
                        else:
                            print("\nInvalid choice...\n")

            DataframeOperation().operations(dataframe)

        case 4:  # Handle Missing Data
            if dataframe is None:
                print("\nPlease load a dataset first (Option 1).\n")
                continue

            class MissingFilling:
                def handle_missing_data(self, df):
                    while True:
                        print("\n== Handle Missing Data ==")
                        print("   1. Display rows with missing values")
                        print("   2. Fill missing values with mean")
                        print("   3. Drop rows with missing values")
                        print("   4. Exit")

                        choice = input("Enter your choice: ")
                        if choice == '1':
                            print("\nMissing Value Columns:", df.columns[df.isnull().any()])
                            print("\nRows with missing values:\n", df[df.isnull().any(axis=1)])
                        elif choice == '2':
                            for col in df.select_dtypes(include=['int64', 'float64']).columns:
                                df[col].fillna(df[col].mean(), inplace=True)
                            print("\nFilled missing values with column means.\n")
                        elif choice == '3':
                            df.dropna(inplace=True)
                            print("\nDropped rows with missing values.\n")
                        elif choice == '4':
                            break
                        else:
                            print("Invalid choice...")

            MissingFilling().handle_missing_data(dataframe)

        case 5:  # Generate Descriptive Statistics
            if dataframe is None:
                print("\nPlease load a dataset first (Option 1).\n")
                continue

            print("\n== Descriptive Statistics ==")
            print(dataframe.describe().to_string())


        case 6:  # Data Visualization
            if dataframe is None:
                print("\nPlease load a dataset first (Option 1).\n")
                continue

            while True:
                print("\n== Data Visualization ==")
                print("   1. Histogram")
                print("   2. Box Plot")
                print("   3. Scatter Plot")
                print("   4. Correlation Heatmap")
                print("   5. Bar Plot")
                print("   6. Pie Chart")
                print("   7. Pair Plot")
                print("   8. Line Plot")
                print("   9. Exit")

                vis_choice = input("Enter your choice: ")

                if vis_choice == '1':  # Histogram
                    num_column = input("Enter column name for histogram: ")
                    if num_column in dataframe.columns:
                        plt.figure(figsize=(8, 5))
                        sns.histplot(dataframe[num_column], kde=True, bins=20)
                        plt.title(f'Histogram of {num_column}')
                        plt.show()
                    else:
                        print("Invalid column name.")

                elif vis_choice == '2':  # Box Plot
                    num_column = input("Enter column name for box plot: ")
                    if num_column in dataframe.columns:
                        plt.figure(figsize=(6, 5))
                        sns.boxplot(y=dataframe[num_column])
                        plt.title(f'Box Plot of {num_column}')
                        plt.show()
                    else:
                        print("Invalid column name.")

                elif vis_choice == '3':  # Scatter Plot
                    x_col = input("Enter X-axis column: ")
                    y_col = input("Enter Y-axis column: ")
                    if x_col in dataframe.columns and y_col in dataframe.columns:
                        plt.figure(figsize=(7, 5))
                        sns.scatterplot(x=dataframe[x_col], y=dataframe[y_col])
                        plt.title(f'Scatter Plot: {x_col} vs {y_col}')
                        plt.show()
                    else:
                        print("Invalid column names.")

                elif vis_choice == '4':  # Correlation Heatmap
                    plt.figure(figsize=(10, 6))
                    sns.heatmap(dataframe.corr(), annot=True, cmap='coolwarm', fmt=".2f")
                    plt.title("Correlation Heatmap")
                    plt.show()

                elif vis_choice == '5':  # Bar Plot
                    cat_column = input("Enter categorical column: ")
                    num_column = input("Enter numerical column: ")
                    if cat_column in dataframe.columns and num_column in dataframe.columns:
                        plt.figure(figsize=(10, 6))
                        sns.barplot(x=dataframe[cat_column], y=dataframe[num_column])
                        plt.xticks(rotation=45)
                        plt.title(f'Bar Plot: {cat_column} vs {num_column}')
                        plt.show()
                    else:
                        print("Invalid column names.")

                elif vis_choice == '6':  # Pie Chart
                    cat_column = input("Enter categorical column for Pie Chart: ")
                    if cat_column in dataframe.columns:
                        plt.figure(figsize=(7, 7))
                        dataframe[cat_column].value_counts().plot.pie(autopct='%1.1f%%', startangle=90)
                        plt.title(f'Distribution of {cat_column}')
                        plt.show()
                    else:
                        print("Invalid column name.")

                elif vis_choice == '7':  # Pair Plot
                    sns.pairplot(dataframe)
                    plt.title("Pair Plot of Numerical Columns")
                    plt.show()


                elif vis_choice == '8':  # Line Plot
                    x_col = input("Enter X-axis column: ")
                    y_col = input("Enter Y-axis column: ")
                    if x_col in dataframe.columns and y_col in dataframe.columns:
                        plt.figure(figsize=(10, 5))
                        sns.lineplot(x=dataframe[x_col], y=dataframe[y_col])
                        plt.title(f'Line Plot: {x_col} vs {y_col}')
                        plt.show()
                    else:
                        print("Invalid column names.")

                elif vis_choice == '9':
                    break

                else:
                    print("Invalid choice. Please enter a number between 1-10.")


            print("\nExiting Data Visualization...\n")

        case 7:  # Save Dataframe
            if dataframe is None:
                print("\nPlease load a dataset first (Option 1).\n")
                continue

            filename = input("Enter filename to save (with .csv extension): ")
            dataframe.to_csv(filename, index=False)
            print(f"Data saved successfully as {filename}.")

        case 8:  # Exit
            print("\nExiting program...")
            break

        case _:
            print("\nInvalid choice. Please enter a number between 1-8.\n")




Please Select an option:
   1. Load Dataset
   2. Explore Data
   3. Perform Dataframe Operations
   4. Handle Missing Data
   5. Generate Descriptive Statistics
   6. Data Visualization
   7. Save Changed Dataframe to CSV
   8. Exit


KeyboardInterrupt: Interrupted by user