In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")


class TitanicEDA:
    def __init__(self):
        self.df = None
        self.path = "data/train.csv"
        self.last_plot_fig = None

    def load_data(self):
        try:
            self.df = pd.read_csv(self.path)
            print("Dataset loaded successfully from data/train.csv")
            print("Shape:", self.df.shape)
        except Exception as e:
            print("Error loading dataset:", e)

    def check_data(self):
        if self.df is None:
            print("Please load dataset first.")
            return False
        return True

    def preview_data(self):
        if self.check_data():
            print(self.df.head())

    def dataset_info(self):
        if self.check_data():
            print(self.df.info())
            print("\nMissing Values:\n", self.df.isnull().sum())

    def clean_data(self):
        if self.check_data():
            self.df["Age"].fillna(self.df["Age"].median(), inplace=True)
            self.df["Embarked"].fillna(self.df["Embarked"].mode()[0], inplace=True)
            print("Missing values handled successfully")

    def survival_summary(self):
        if self.check_data():
            print("Survival Count:\n", self.df["Survived"].value_counts())
            print("\nSurvival Percentage:\n",
                  self.df["Survived"].value_counts(normalize=True) * 100)

    def survival_by_column(self):
        if self.check_data():
            col = input("Enter column (Sex / Pclass / Embarked): ")
            if col in self.df.columns:
                print(self.df.groupby(col)["Survived"].mean() * 100)
            else:
                print("Invalid column")

    def plot_survival_count(self):
        if self.check_data():
            self.last_plot_fig = plt.figure()
            sns.countplot(x="Survived", data=self.df)
            plt.title("Survival Count")
            plt.show()

    def plot_survival_by_gender(self):
        if self.check_data():
            self.last_plot_fig = plt.figure()
            sns.countplot(x="Sex", hue="Survived", data=self.df)
            plt.title("Survival by Gender")
            plt.show()

    def plot_survival_by_class(self):
        if self.check_data():
            self.last_plot_fig = plt.figure()
            sns.countplot(x="Pclass", hue="Survived", data=self.df)
            plt.title("Survival by Passenger Class")
            plt.show()

    def plot_age_distribution(self):
        if self.check_data():
            self.last_plot_fig = plt.figure()
            sns.histplot(self.df["Age"], bins=30, kde=True)
            plt.title("Age Distribution")
            plt.show()

    def plot_fare_vs_survival(self):
        if self.check_data():
            self.last_plot_fig = plt.figure()
            sns.boxplot(x="Survived", y="Fare", data=self.df)
            plt.title("Fare vs Survival")
            plt.show()

    def statistical_analysis(self):
        if self.df is None:
            print("Please load dataset first.")
            return
    
        print("\n== Statistical Analysis ==")
        print("1. Describe Numerical Columns")
        print("2. Mean")
        print("3. Median")
        print("4. Mode")
        print("5. Standard Deviation")
        print("6. Variance")
        print("7. Min & Max")
        print("8. Quartiles")
        print("9. Go Back")
    
        choice = int(input("Enter your choice: "))
    
        num_cols = self.df.select_dtypes(include="number").columns
    
        if choice == 1:
            print(self.df[num_cols].describe())
    
        elif choice == 2:
            print("Mean:\n", self.df[num_cols].mean())
    
        elif choice == 3:
            print("Median:\n", self.df[num_cols].median())
    
        elif choice == 4:
            print("Mode:\n", self.df[num_cols].mode().iloc[0])
    
        elif choice == 5:
            print("Standard Deviation:\n", self.df[num_cols].std())
    
        elif choice == 6:
            print("Variance:\n", self.df[num_cols].var())
    
        elif choice == 7:
            print("Minimum Values:\n", self.df[num_cols].min())
            print("\nMaximum Values:\n", self.df[num_cols].max())
    
        elif choice == 8:
            print("Quartiles:\n", self.df[num_cols].quantile([0.25, 0.5, 0.75]))
    
        elif choice == 9:
            return
    
        else:
            print("Invalid choice")

    def save_recent_plot(self):
        if self.last_plot_fig is None:
            print("No plot available. Please generate a plot first.")
            return
    
        filename = input("Enter file name (e.g., plot.png): ")
        self.last_plot_fig.savefig(filename)
        print(f"Plot saved successfully as {filename}")



eda = TitanicEDA()

while True:
    print("\n===== Titanic Survival Analysis =====")
    print("1. Load Dataset")
    print("2. Preview Data")
    print("3. Dataset Info")
    print("4. Clean Data")
    print("5. Survival Summary")
    print("6. Survival by Column")
    print("7. Plot Survival Count")
    print("8. Plot Survival by Gender")
    print("9. Plot Survival by Class")
    print("10. Age Distribution")
    print("11. Fare vs Survival")
    print("12. Statistical Analysis")
    print("13. Save recent plot")
    print("14. Exit")

    choice = int(input("Enter your choice: "))

    if choice == 1:
        eda.load_data()
    elif choice == 2:
        eda.preview_data()
    elif choice == 3:
        eda.dataset_info()
    elif choice == 4:
        eda.clean_data()
    elif choice == 5:
        eda.survival_summary()
    elif choice == 6:
        eda.survival_by_column()
    elif choice == 7:
        eda.plot_survival_count()
    elif choice == 8:
        eda.plot_survival_by_gender()
    elif choice == 9:
        eda.plot_survival_by_class()
    elif choice == 10:
        eda.plot_age_distribution()
    elif choice == 11:
        eda.plot_fare_vs_survival()
    elif choice ==12:
        eda.statistical_analysis()
    elif choice ==13:
        eda.save_recent_plot()
    elif choice == 14:
        print("Exiting program...")
        break
    else:
        print("Invalid choice")





===== Titanic Survival Analysis =====
1. Load Dataset
2. Preview Data
3. Dataset Info
4. Clean Data
5. Survival Summary
6. Survival by Column
7. Plot Survival Count
8. Plot Survival by Gender
9. Plot Survival by Class
10. Age Distribution
11. Fare vs Survival
12. Statistical Analysis
13. Save recent plot
14. Exit


Enter your choice:  14


Exiting program...
