### Import Libraries & Read CSV

In [1]:
import pandas as pd

In [2]:
# loading csv
df = pd.read_csv("data/Titanic-Dataset.csv")

# Quick confirmation
print("Data loaded successfully!")

Data loaded successfully!


### Inspect the Data

In [3]:
# First 5 rows
print("Head of DataFrame:")
print(df.head())

# Last 5 rows
print("\nTail of DataFrame:")
print(df.tail())

# Info: columns, dtypes, non-null counts
print("\nDataFrame Info:")
print(df.info())

# Column names
print("\nColumns:")
print(df.columns)

Head of DataFrame:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500 

### Summary Statistics

In [4]:
# Numeric summary
print("\nSummary Statistics (Numeric):")
print(df.describe())

# Specific stats for selected columns
print("\nMean Age:", round(df['Age'].mean(), 2))
print("Median Fare:", round(df['Fare'].median(), 2))
print("Min Age:", df['Age'].min())
print("Max Fare:", df['Fare'].max())
print("Total Passengers:", df['PassengerId'].count())


Summary Statistics (Numeric):
       PassengerId    Survived      Pclass         Age       SibSp  \
count   891.000000  891.000000  891.000000  714.000000  891.000000   
mean    446.000000    0.383838    2.308642   29.699118    0.523008   
std     257.353842    0.486592    0.836071   14.526497    1.102743   
min       1.000000    0.000000    1.000000    0.420000    0.000000   
25%     223.500000    0.000000    2.000000   20.125000    0.000000   
50%     446.000000    0.000000    3.000000   28.000000    0.000000   
75%     668.500000    1.000000    3.000000   38.000000    1.000000   
max     891.000000    1.000000    3.000000   80.000000    8.000000   

            Parch        Fare  
count  891.000000  891.000000  
mean     0.381594   32.204208  
std      0.806057   49.693429  
min      0.000000    0.000000  
25%      0.000000    7.910400  
50%      0.000000   14.454200  
75%      0.000000   31.000000  
max      6.000000  512.329200  

Mean Age: 29.7
Median Fare: 14.45
Min Age: 0.42
M

### Filter Rows & Select Columns

In [5]:
df['Sex'] = df['Sex'].str.strip().str.lower()  

# Filter: All females
females = df[df['Sex'] == 'female']

# Filter: Passengers with Age > 50
older_passengers = df[df['Age'] > 50]

# Selecting specific columns
subset = df[['Name', 'Sex', 'Age', 'Fare']]

# Older females (Age > 50 AND Sex == 'female')
older_females = df[(df['Age'] > 50) & (df['Sex'] == 'female')][['Name', 'Sex', 'Age', 'Fare']]

print("\nOlder females sample:")
print(older_females.head())


Older females sample:
                                              Name     Sex   Age      Fare
11                        Bonnell, Miss. Elizabeth  female  58.0   26.5500
15                Hewlett, Mrs. (Mary D Kingcome)   female  55.0   16.0000
195                           Lurette, Miss. Elise  female  58.0  146.5208
268  Graham, Mrs. William Thompson (Edith Junkins)  female  58.0  153.4625
275              Andrews, Miss. Kornelia Theodosia  female  63.0   77.9583


### Save Filtered Results

In [6]:
# Save older females to CSV
older_females.to_csv("older_females.csv", index=False)

In [7]:
print(df['Sex'].unique())


['male' 'female']
