In [1]:
import pandas as pd

titanic = pd.read_csv('Titanic_Dataset.csv')

# Quick check
print(titanic.shape)    # (rows, columns)
print(titanic.head())   # First few rows

(1309, 14)
   pclass  survived                                             name     sex  \
0       1         1                    Allen, Miss. Elisabeth Walton  female   
1       1         1                   Allison, Master. Hudson Trevor    male   
2       1         0                     Allison, Miss. Helen Loraine  female   
3       1         0             Allison, Mr. Hudson Joshua Creighton    male   
4       1         0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   

     age  sibsp  parch  ticket      fare    cabin embarked boat   body  \
0  29.00      0      0   24160  211.3375       B5        S    2    NaN   
1   0.92      1      2  113781  151.5500  C22 C26        S   11    NaN   
2   2.00      1      2  113781  151.5500  C22 C26        S  NaN    NaN   
3  30.00      1      2  113781  151.5500  C22 C26        S  NaN  135.0   
4  25.00      1      2  113781  151.5500  C22 C26        S  NaN    NaN   

                         home.dest  
0                     St L

In [2]:
# Get one column (returns a Series - like a single column)
names = titanic['name']
print(names.head())  # Gives you first five records/rows of names

0                      Allen, Miss. Elisabeth Walton
1                     Allison, Master. Hudson Trevor
2                       Allison, Miss. Helen Loraine
3               Allison, Mr. Hudson Joshua Creighton
4    Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
Name: name, dtype: object


In [3]:
# Select multiple columns (returns a DataFrame)
subset = titanic[['name', 'age', 'sex', 'survived']]
print(subset.head())

# Note the double brackets: [[ ]]
# Outer brackets = "select from titanic"
# Inner brackets = list of columns

                                              name    age     sex  survived
0                    Allen, Miss. Elisabeth Walton  29.00  female         1
1                   Allison, Master. Hudson Trevor   0.92    male         1
2                     Allison, Miss. Helen Loraine   2.00  female         0
3             Allison, Mr. Hudson Joshua Creighton  30.00    male         0
4  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  25.00  female         0


In [4]:
subset

Unnamed: 0,name,age,sex,survived
0,"Allen, Miss. Elisabeth Walton",29.00,female,1
1,"Allison, Master. Hudson Trevor",0.92,male,1
2,"Allison, Miss. Helen Loraine",2.00,female,0
3,"Allison, Mr. Hudson Joshua Creighton",30.00,male,0
4,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",25.00,female,0
...,...,...,...,...
1304,"Zabour, Miss. Hileni",14.50,female,0
1305,"Zabour, Miss. Thamine",,female,0
1306,"Zakarian, Mr. Mapriededer",26.50,male,0
1307,"Zakarian, Mr. Ortin",27.00,male,0


In [5]:
# Just passenger info
passenger_info = titanic[['name', 'sex', 'age']]

# Just survival data
survival_data = titanic[['name', 'survived', 'pclass']]

# You can then work with these smaller DataFrames
print(passenger_info.info())
print(survival_data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    1309 non-null   object 
 1   sex     1309 non-null   object 
 2   age     1046 non-null   float64
dtypes: float64(1), object(2)
memory usage: 30.8+ KB
None
          survived       pclass
count  1309.000000  1309.000000
mean      0.381971     2.294882
std       0.486055     0.837836
min       0.000000     1.000000
25%       0.000000     2.000000
50%       0.000000     3.000000
75%       1.000000     3.000000
max       1.000000     3.000000


In [6]:
# Create a condition (True/False for each row)
survived_condition = titanic['survived'] == 1

# Use condition to filter
survivors = titanic[survived_condition]

# Or combine in one line
survivors = titanic[titanic['survived'] == 1]

print(f"Total passengers: {len(titanic)}")
print(f"Survivors: {len(survivors)}")

Total passengers: 1309
Survivors: 500


In [7]:
# Step 1: Create a True/False mask
mask = titanic['age'] > 30
print(mask.head())  # Shows True, False, True, False, etc.

# Step 2: Use mask to filter
older_passengers = titanic[mask]

# The DataFrame keeps only rows where mask is True
print(older_passengers[['name', 'age']].head())

0    False
1    False
2    False
3    False
4    False
Name: age, dtype: bool
                                            name   age
5                            Anderson, Mr. Harry  48.0
6              Andrews, Miss. Kornelia Theodosia  63.0
7                         Andrews, Mr. Thomas Jr  39.0
8  Appleton, Mrs. Edward Dale (Charlotte Lamson)  53.0
9                        Artagaveytia, Mr. Ramon  71.0


In [8]:
# AND: Both conditions must be true
female_survivors = titanic[
    (titanic['sex'] == 'female') & 
    (titanic['survived'] == 1)
]
upper_class = titanic[
    (titanic['pclass'] == 1) | 
    (titanic['pclass'] == 2)
]
young_first_survivors = titanic[
    (titanic['age'] < 18) & 
    (titanic['pclass'] == 1) & 
    (titanic['survived'] == 1)
]

In [9]:
# Sort by age (youngest first)
sorted_by_age = titanic.sort_values('age')
print(sorted_by_age[['name', 'age']].head())

# Sort by age (oldest first)
sorted_by_age_desc = titanic.sort_values('age', ascending=False)
print(sorted_by_age_desc[['name', 'age']].head())

# Sort by multiple columns
sorted_multiple = titanic.sort_values(['pclass', 'fare'], 
                                       ascending=[True, False])
# First by class (low to high), then by fare (high to low)

                                         name   age
763   Dean, Miss. Elizabeth Gladys "Millvina"  0.17
747   Danbom, Master. Gilbert Sigvard Emanuel  0.33
1240          Thomas, Master. Assad Alexander  0.42
427                 Hamalainen, Master. Viljo  0.67
658             Baclini, Miss. Helene Barbara  0.75
                                                   name   age
14                 Barkworth, Mr. Algernon Henry Wilson  80.0
61    Cavendish, Mrs. Tyrell William (Julia Florence...  76.0
1235                                Svensson, Mr. Johan  74.0
135                           Goldschmidt, Mr. George B  71.0
9                               Artagaveytia, Mr. Ramon  71.0


In [10]:
# Basic statistics on a column
print("Average age:", titanic['age'].mean())
print("Minimum fare:", titanic['fare'].min())
print("Maximum fare:", titanic['fare'].max())
print("Total passengers:", len(titanic))

# Count non-missing values
print("Passengers with known age:", titanic['age'].count())

Average age: 29.881137667304014
Minimum fare: 0.0
Maximum fare: 512.3292
Total passengers: 1309
Passengers with known age: 1046


In [11]:
survival_by_class = titanic.groupby('pclass')['survived'].mean() * 100
print(survival_by_class)

pclass
1    61.919505
2    42.960289
3    25.528914
Name: survived, dtype: float64
