In [1]:
import pandas as pd
df = pd.read_csv('../datasets/Sleep_Efficiency.csv')
df.head()

Unnamed: 0,ID,Age,Gender,Bedtime,Wakeup time,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Smoking status,Exercise frequency
0,1,65,Female,2021-03-06 01:00:00,2021-03-06 07:00:00,6.0,0.88,18,70,12,0.0,0.0,0.0,Yes,3.0
1,2,69,Male,2021-12-05 02:00:00,2021-12-05 09:00:00,7.0,0.66,19,28,53,3.0,0.0,3.0,Yes,3.0
2,3,40,Female,2021-05-25 21:30:00,2021-05-25 05:30:00,8.0,0.89,20,70,10,1.0,0.0,0.0,No,3.0
3,4,40,Female,2021-11-03 02:30:00,2021-11-03 08:30:00,6.0,0.51,23,25,52,3.0,50.0,5.0,Yes,1.0
4,5,57,Male,2021-03-13 01:00:00,2021-03-13 09:00:00,8.0,0.76,27,55,18,3.0,0.0,3.0,No,3.0


### Program

In [None]:
class DescriptiveAnalyzer:
    def __init__(self, file_path, columns):
        self.file_path = file_path      # CSV path
        self.columns = columns          # List of columns to analyze
        self.df = None                  # Placeholder for the loaded DataFrame

    def read_data(self):
        self.df = pd.read_csv(self.file_path)


    def get_mean(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return
    
        for col in self.columns: # Iterate columns
            if col in self.df.columns: # If the column name exist
                mean_val = self.df[col].mean()
                print(f"Mean of '{col}': {mean_val}")
            else:
                print(f"Column '{col}' not found in dataset.")
            

    def get_median(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                median_val = self.df[col].median()
                print(f"Median of '{col}': {median_val}")
            else:
                print(f"Column '{col}' not found in dataset.")
            

    def get_mode(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                mode_val = self.df[col].mode()
                print(f"Mode of '{col}': {mode_val}")
            else:
                print(f"Column '{col}' not found in dataset.")
                

    def get_range(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
                if col in self.df.columns:
                    try:
                        min_val = min(self.df[col])
                        max_val = max(self.df[col])
                        range_val = max_val - min_val
                        print(f"Range of '{col}': {range_val}")
                    except TypeError:
                        print(f"Column '{col}' contains non-numeric data.")
                else:
                     print(f"Column '{col}' not found in dataset.")


    def get_std(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                try:
                    std_val = self.df[col].std() 
                    print(f"Standard Deviation of '{col}': {std_val}")
                except TypeError:
                    print(f"Column '{col}' contains non-numeric data.")
            else:
                print(f"Column '{col}' not found in dataset.")
                


    def get_variance(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                try:
                    std_val = self.df[col].std()  
                    variance = std_val**2
                    print(f"Variance of '{col}': {variance}")
                except TypeError:
                    print(f"Column '{col}' contains non-numeric data.")
            else:
                print(f"Column '{col}' not found in dataset.")
                


    def calculate_IQR_exclusive(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                try:
                    from scipy import stats
                    exclusive_IQR = stats.iqr(self.df[col])
                    print(f"exclusive IQR of '{col}': {exclusive_IQR}")
                except TypeError:
                    print(f"Column '{col}' contains non-numeric data.")
            else:
                print(f"Column '{col}' not found in dataset.")
                


    def calculate_IQR_inclusive(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                try:
                    q1 = self.df[col].quantile(0.25, interpolation="midpoint") # 25%
                    q3 = self.df[col].quantile(0.75, interpolation="midpoint") # 75%
                    inclusive_IQR = q3 - q1
                    print(f"inclusive IQR of '{col}': {inclusive_IQR}")
                except TypeError:
                    print(f"Column '{col}' contains non-numeric data.")
            else:
                print(f"Column '{col}' not found in dataset.")  
                


    def determine_outliers_iqr_inclusive(self):
        if self.df is None:
            print("Data not loaded. Call read_data() first.")
            return

        for col in self.columns:
            if col in self.df.columns:
                try:
                    # Compute Q1 and Q3 using inclusive method
                    q1 = self.df[col].quantile(0.25, interpolation="midpoint")
                    q3 = self.df[col].quantile(0.75, interpolation="midpoint")
                    iqr = q3 - q1
    
                    # Calculate bounds
                    lower = q1 - 1.5 * iqr
                    upper = q3 + 1.5 * iqr
    
                    # Identify outliers using correct syntax
                    outliers = self.df[(self.df[col] < lower) | (self.df[col] > upper)]
    
                    print(f"Outliers in '{col}' using IQR (inclusive):")
                    print(outliers[[col]])
                except TypeError:
                    print(f"Column '{col}' contains non-numeric data.")
            else:
                print(f"Column '{col}' not found in dataset.")
        

user_input = str(input("Type a column in the data you want to use: "))

def main_program(user_input):
    DA = DescriptiveAnalyzer('../datasets/Sleep_Efficiency.csv', [user_input])
    DA.read_data()

    while True:
        print("\nChoose an operation:")
        print("1. Mean")
        print("2. Median")
        print("3. Mode")
        print("4. Range")
        print("5. Standard Deviation")
        print("6. Variance")
        print("7. IQR (Exclusive method)")
        print("8. IQR (Inclusive method)")
        print("9. Detect Outliers (IQR Inclusive method)")
        print("0. Exit")

        choice = input("Enter your choice (0–9): ")

        if choice == '1':
            DA.get_mean()
        elif choice == '2':
            DA.get_median()
        elif choice == '3':
            DA.get_mode()
        elif choice == '4':
            DA.get_range()
        elif choice == '5':
            DA.get_std()
        elif choice == '6':
            DA.get_variance()
        elif choice == '7':
            DA.calculate_IQR_exclusive()
        elif choice == '8':
            DA.calculate_IQR_inclusive()
        elif choice == '9':
            DA.determine_outliers_iqr_inclusive()
        elif choice == '0':
            print("Exiting program.")
            break
        else:
            print("Invalid choice. Please enter a number from 0 to 9.")

main_program(user_input)


Type a column in the data you want to use:  Age



Choose an operation:
1. Mean
2. Median
3. Mode
4. Range
5. Standard Deviation
6. Variance
7. IQR (Exclusive method)
8. IQR (Inclusive method)
9. Detect Outliers (IQR Inclusive method)
0. Exit


Enter your choice (0–9):  9


Outliers in 'Age' using IQR (inclusive):
Empty DataFrame
Columns: [Age]
Index: []

Choose an operation:
1. Mean
2. Median
3. Mode
4. Range
5. Standard Deviation
6. Variance
7. IQR (Exclusive method)
8. IQR (Inclusive method)
9. Detect Outliers (IQR Inclusive method)
0. Exit


Enter your choice (0–9):  8


inclusive IQR of 'Age': 23.0

Choose an operation:
1. Mean
2. Median
3. Mode
4. Range
5. Standard Deviation
6. Variance
7. IQR (Exclusive method)
8. IQR (Inclusive method)
9. Detect Outliers (IQR Inclusive method)
0. Exit


Enter your choice (0–9):  7


exclusive IQR of 'Age': 23.0

Choose an operation:
1. Mean
2. Median
3. Mode
4. Range
5. Standard Deviation
6. Variance
7. IQR (Exclusive method)
8. IQR (Inclusive method)
9. Detect Outliers (IQR Inclusive method)
0. Exit
