In [13]:
import pandas as pd
import numpy as np

# Konfigurasi Pandas agar tampilan float lebih rapi (4 angka desimal)
pd.options.display.float_format = '{:.4f}'.format 

# 1. MEMUAT DAN MEMBERSIHKAN DATA
# ----------------------------------------------------------------------
file_path = 'titanic.csv'

try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"ERROR: File '{file_path}' tidak ditemukan.")
    exit() 

# Mengisi nilai "Age" yang hilang dengan nilai median
median_age = df['age'].median()
df['age'].fillna(median_age, inplace=True)


# TUGAS 1: RATA-RATA (AVERAGES)
# ----------------------------------------------------------------------
# Menghitung rata-rata (mean) dari kolom yang diminta
averages = df[['age', 'fare', 'survived', 'pclass']].mean().to_frame(name='Rata-rata')

print("### 1. RATA-RATA (AVERAGES) ###")
print(averages)
print("-" * 30)
print("\n")


# TUGAS 2: NILAI MAKSIMUM DAN MINIMUM
# ----------------------------------------------------------------------
# Menghitung nilai maksimum dan minimum dari 'age' dan 'fare'
max_min_data = pd.DataFrame({
    'Min Value': [df['age'].min(), df['fare'].min()],
    'Max Value': [df['age'].max(), df['fare'].max()]
}, index=['Age', 'Fare'])

print("### 2. MAX dan MIN Usia (Age) dan Fare ###")
print(max_min_data)
print("-" * 30)
print("\n")


# TUGAS 3: IDENTIFIKASI OUTLIER FARE (Hanya Data Record)
# ----------------------------------------------------------------------

# Hitung Kuartil dan IQR
Q1 = df['fare'].quantile(0.25)
Q3 = df['fare'].quantile(0.75)
IQR = Q3 - Q1
upper_bound = Q3 + 1.5 * IQR

# Filter record data outlier (Fare di atas batas atas)
fare_outliers = df[df['fare'] > upper_bound]

print("### 3. OUTLIER FARE (DATA RECORD) ###")
print(f"Batas Atas Outlier (1.5 * IQR Rule): {upper_bound:.4f}")
print(f"Total Record Outlier Fare: {len(fare_outliers)}")

print("\nContoh 10 Data Record Outlier Fare (Name, Age, Fare):")
# Tampilkan kolom yang relevan dari 10 data outlier pertama
print(fare_outliers[['name', 'age', 'fare']].head(10).to_string(index=False))
print("-" * 30)

# Catatan: Visual Data (Box Plot) dihilangkan untuk menghindari error.

### 1. RATA-RATA (AVERAGES) ###
          Rata-rata
age         29.6991
fare        34.6945
survived     0.4062
pclass       2.2367
------------------------------


### 2. MAX dan MIN Usia (Age) dan Fare ###
      Min Value  Max Value
Age      0.4200    80.0000
Fare     0.0000   512.3292
------------------------------


### 3. OUTLIER FARE (DATA RECORD) ###
Batas Atas Outlier (1.5 * IQR Rule): 71.3625
Total Record Outlier Fare: 94

Contoh 10 Data Record Outlier Fare (Name, Age, Fare):
                                    name     age     fare
          Fortune, Mr. Charles Alexander 19.0000 263.0000
                 Meyer, Mr. Edgar Joseph 28.0000  82.1708
Harper, Mrs. Henry Sleeper (Myna Haxtun) 49.0000  76.7292
                     Icard, Miss. Amelie 38.0000  80.0000
             Harris, Mr. Henry Birkhardt 45.0000  83.4750
                    Hood, Mr. Ambrose Jr 21.0000  73.5000
              Fortune, Miss. Mabel Helen 23.0000 263.0000
               White, Mr. Richard Frasar 21.00

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(median_age, inplace=True)
