<a href="https://colab.research.google.com/github/codebyted/AI_Programming_Project/blob/main/AI_data_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files # Import files for Colab
import io # Import io for reading uploaded files

def format_number(n):
    """Format numbers: remove decimals if not needed"""
    if pd.isna(n):
        return n
    return int(n) if float(n).is_integer() else round(float(n), 2)

def load_dataset():
    """Prompt user to select CSV or Excel file"""
    print("\nSelect a dataset (CSV or Excel)...")

    uploaded = files.upload()

    if not uploaded:
        print("No file selected.")
        return None

    file_name = list(uploaded.keys())[0]
    try:
        if file_name.endswith(".csv"):
            df = pd.read_csv(io.BytesIO(uploaded[file_name]))
        elif file_name.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(io.BytesIO(uploaded[file_name]))
        else:
            print("Unsupported file format. Please upload a CSV or Excel file.")
            return None
        print(f"Dataset loaded: {file_name}")
        return df
    except Exception as e:
        print(f"Failed to load dataset: {e}")
        return None

def show_basic_info(df):
    print("\nBASIC INFO")
    print(df.head(), "\n")
    print("Columns:", df.columns.tolist())
    print("Shape:", df.shape)

def clean_data(df):
    print("\nCleaning numeric data...")
    for col in df.select_dtypes(include='number'):
        df[col] = df[col].apply(format_number)
    print("Data cleaned.")

def show_statistics(df):
    print("\nSUMMARY STATISTICS")
    stats = df.describe()
    formatted = stats.map(format_number) # Changed applymap to map
    print(formatted)

def visualize_data(df):
    num_cols = df.select_dtypes(include='number').columns.tolist()
    if not num_cols:
        print("No numeric columns to visualize.")
        return

    print("\nChoose a numeric column to plot:")
    for i, col in enumerate(num_cols, 1):
        print(f"{i}. {col}")

    try:
        choice = int(input("Column number: ")) - 1
        if 0 <= choice < len(num_cols):
            col_name = num_cols[choice]
            df[col_name].plot(kind="line")
            plt.title(f"{col_name} Trend")
            plt.xlabel("Index")
            plt.ylabel(col_name)
            plt.show()
        else:
            print("Invalid choice")
    except:
        print("Input must be a number")

def menu():
    df = None
    while True:
        print("\n====== DATA ANALYZER ======")
        print("1. Load Dataset")
        print("2. Show Basic Info")
        print("3. Clean Data")
        print("4. Show Summary Stats")
        print("5. Visualize Data")
        print("6. Exit")

        choice = input("Choose option: ")

        if choice == "1":
            df = load_dataset()
        elif choice == "2" and df is not None:
            show_basic_info(df)
        elif choice == "3" and df is not None:
            clean_data(df)
        elif choice == "4" and df is not None:
            show_statistics(df)
        elif choice == "5" and df is not None:
            visualize_data(df)
        elif choice == "6":
            print("Goodbye!")
            break
        else:
            print("Load dataset first or choose a valid option!")

if __name__ == "__main__":
    menu()