# Project Title: Command-Line Data Analytics Tool 

In [2]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

# path - D:/NexGen/Week_03/06a01a7e-2517-46d5-9192-631395602684.xlsx.csv

In [None]:
class Analytix():

    def __init__(self):
        self.df = None                                                                # Stores data
        self.numeric_cols = []                                                        # Stores numeric columns
        self.selected_col = None                                                      # Stores selected columns

    def load_file(self):
        path = input("Enter the file path: ")
        if os.path.exists(path):                                                      # Check path exist or not
            self.df = pd.read_csv(path)
        else:
            print(f"File not found at: {path}")
            exit()

    def preview_data(self):
        print(f"The dataset contains {self.df.shape[0]} rows and {self.df.shape[1]} columns.")
        print("Column names:")
        print(self.df.columns.tolist())
        print("First 15 rows of the dataset:")
        print(self.df.head(15))

    def analyze_numeric_columns(self):
        self.numeric_cols = self.df.select_dtypes(include=["number"]).columns.tolist()   # Filter numeric columns and stores their names in a list.
        print("Numeric columns:")                                                        # ['Sales', 'Profit', 'Quantity', 'Discount']
        for i, col in enumerate(self.numeric_cols, start=1):
            print(f"{i}. {col}")
        choice = int(input("Enter the number of the column you want to select: "))
        if 1 <= choice <= len(self.numeric_cols):
            self.selected_col = self.numeric_cols[choice - 1]
            print(f"You selected: {self.selected_col}")
            self.numeric_submenu()                            
        else:
            print("Invalid option selected.")

    def numeric_submenu(self):
        while True:
            print("\n--- Numeric Column Analysis Menu ---")
            print("1. Total (Sum)")
            print("2. Average (Mean)")
            print("3. Maximum value")
            print("4. Minimum value")
            print("5. Count of values")
            print("6. Back to Main Menu")
            choice = input("Enter your choice (1-6): ")
            if choice == "1":
                self.total()
            elif choice == "2":
                self.average()
            elif choice == "3":
                self.maximum()
            elif choice == "4":
                self.minimum()
            elif choice == "5":
                self.counts()
            elif choice == "6":
                break
            else:
                print("Invalid choice.")

    def total(self):
        print(f"Total of {self.selected_col}: {self.df[self.selected_col].sum()}")

    def average(self):
        print(f"Average of {self.selected_col}: {self.df[self.selected_col].mean()}")

    def maximum(self):
        print(f"Maximum value of {self.selected_col}: {self.df[self.selected_col].max()}")

    def minimum(self):
        print(f"Minimum value of {self.selected_col}: {self.df[self.selected_col].min()}")

    def counts(self):
        print(f"Count of {self.selected_col}: {self.df[self.selected_col].count()}")


    def Check_Missing_Values(self):
        col = self.df.columns.tolist()
        print("Numeric columns:")
        for i, j in enumerate(col, start=1):
            print(f"{i}. {j}")

        choice1 = int(input("Enter the number of the column you want to select: "))
        if 1 <= choice1 <= len(col):
            select_col = col[choice1 - 1]
            print(f"Selected column: {select_col}")
            missing_count = self.df[select_col].isnull().sum()
            print(f"Count of missing entries: {missing_count}")
        else:
            print("Invalid option selected.")


    def text_histogram(self):
        numeric_cols = self.df.select_dtypes(include=["number"]).columns.tolist()     # Filter numeric columns and stores their names in a list.
        if not numeric_cols:
            print("No numeric columns found.")
            return
        for i, col in enumerate(numeric_cols, start=1):
            print("Numeric columns:")                                                 # Print numeric columns
            print(f"{i}. {col}")
        choice = int(input("Enter the number of the column for histogram: "))
        if 1 <= choice <= len(numeric_cols):
            col_name = numeric_cols[choice - 1]
            bin_size = int(input("Enter bin size: "))
            col_data = self.df[col_name].dropna()
            bins = range(int(col_data.min()), int(col_data.max()) + bin_size, bin_size)   # Creates bins from min to max with step bin_size.
            hist = pd.cut(col_data, bins).value_counts().sort_index() # Divide data into bins and counts how many values are in each bin and sorts by bin order.
            print(f"\nText Histogram for {col_name}:")
            for interval, count in hist.items():
                print(f"{interval}: {count}")
        else:
            print("Invalid option selected.")

    def plot_histogram(self):
        """Plot histogram of a selected numeric column"""
        numeric_cols = self.df.select_dtypes(include=["number"]).columns.tolist()
        if not numeric_cols:
            print("No numeric columns available for histogram.")
            return
        print("Numeric columns:")
        for i, col in enumerate(numeric_cols, start=1):
            print(f"{i}. {col}")
        choice = int(input("Enter the number of the column for histogram: "))
        if 1 <= choice <= len(numeric_cols):
            col_name = numeric_cols[choice - 1]
            plt.figure(figsize=(8, 5))
            sns.histplot(self.df[col_name].dropna(), bins=20, kde=True, color="skyblue")
            plt.title(f"Histogram of {col_name}")
            plt.xlabel(col_name)
            plt.ylabel("Frequency")
            plt.show()
        else:
            print("Invalid option selected.")

    def plot_correlation_heatmap(self):
        """Plot heatmap of correlation between numeric columns (excluding ID-like columns)"""
        # Copy numeric columns
        numeric_cols = self.df.select_dtypes(include=["number"]).copy()

        # Drop unwanted columns if present
        drop_cols = ["Row ID", "Postal Code"]
        numeric_cols = numeric_cols.drop(columns=[col for col in drop_cols if col in numeric_cols.columns], errors="ignore")

        if numeric_cols.empty:
            print("No valid numeric columns available for correlation heatmap.")
            return

        plt.figure(figsize=(10, 6))
        corr = numeric_cols.corr()
        sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
        plt.title("Correlation Heatmap of Numeric Columns")
        plt.show()


    def menu(self):
        while True:
            print("\nMain Menu:")
            print("1. Load CSV File")
            print("2. Preview Data")
            print("3. Analyze Numeric Columns")
            print("4. Check Missing Values")
            print("5. Text Histogram")
            print("6. Plot Histogram")
            print("7. Correlation Heatmap")
            print("8. Exit")
            choice = input("Enter your choice: ")

            if choice == "1":
                self.load_file()
            elif choice == "2":
                self.preview_data()
            elif choice == "3":
                self.analyze_numeric_columns()
            elif choice == "4":
                self.Check_Missing_Values()
            elif choice == "5":
                self.text_histogram()
            elif choice == "6":
                self.plot_histogram()
            elif choice == "7":
                self.plot_correlation_heatmap()
            elif choice == "8":
                print("Exiting program.")
                break
            else:
                print("Invalid choice.")

if __name__ == "__main__":                     # runs if you execute this file directly
    tool = Analytix()                         # create instence
    tool.menu()                                # calling
