In [None]:
import pandas as pd

# Load Titanic dataset
titanic_df = pd.read_csv("/content/drive/MyDrive/dataset/Copy of Titanic-Dataset.csv")

# Check the basic info of the dataset
titanic_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [None]:
# Create fare DataFrame
fare = titanic_df[['Fare']]
print(fare.head())


      Fare
0   7.2500
1  71.2833
2   7.9250
3  53.1000
4   8.0500


In [None]:
# Create class_age DataFrame
class_age = titanic_df[['Pclass', 'Age']]
print(class_age.head())


   Pclass   Age
0       3  22.0
1       1  38.0
2       3  26.0
3       1  35.0
4       3  35.0


In [None]:
# Create survived_gender DataFrame
survived_gender = titanic_df[['Survived', 'Sex']]
print(survived_gender.head())


   Survived     Sex
0         0    male
1         1  female
2         1  female
3         1  female
4         0    male


In [None]:
# Filter for passengers with fare > 100
fare_gt_100 = titanic_df[titanic_df['Fare'] > 100]
print(fare_gt_100)


     PassengerId  Survived  Pclass  \
27            28         0       1   
31            32         1       1   
88            89         1       1   
118          119         0       1   
195          196         1       1   
215          216         1       1   
258          259         1       1   
268          269         1       1   
269          270         1       1   
297          298         0       1   
299          300         1       1   
305          306         1       1   
306          307         1       1   
307          308         1       1   
311          312         1       1   
318          319         1       1   
319          320         1       1   
325          326         1       1   
332          333         0       1   
334          335         1       1   
337          338         1       1   
341          342         1       1   
373          374         0       1   
377          378         0       1   
380          381         1       1   
390         

In [None]:
# Filter for passengers in class 1
first_class = titanic_df[titanic_df['Pclass'] == 1]
print(first_class)


In [None]:
# Filter for female passengers under 18
female_under_18 = titanic_df[(titanic_df['Age'] < 18) & (titanic_df['Sex'] == 'female')]
print(female_under_18)


In [None]:
# Filter for passengers who boarded at Cherbourg or Southampton
embarked_c_or_s = titanic_df[titanic_df['Embarked'].isin(['C', 'S'])]
print(embarked_c_or_s)


In [None]:
# Filter for passengers in class 1 or class 2
first_second_class = titanic_df[titanic_df['Pclass'].isin([1, 2])]
print(first_second_class)


In [None]:
# Fill missing values in Age column with the median
titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)

# Create fare_per_year column
titanic_df['fare_per_year'] = titanic_df['Fare'] / titanic_df['Age']


In [None]:
# Subset for fare_per_year > 5
high_fare_age = titanic_df[titanic_df['fare_per_year'] > 5]
print(high_fare_age)


In [None]:
# Sort by fare_per_year descending
high_fare_age_srt = high_fare_age.sort_values(by='fare_per_year', ascending=False)
print(high_fare_age_srt)


In [None]:
# Select Name and fare_per_year columns
result = high_fare_age_srt[['Name', 'fare_per_year']]
print(result)


In [None]:
# Create fare_per_class column
titanic_df['fare_per_class'] = titanic_df['Fare'] / titanic_df['Pclass']


In [None]:
# Filter for adult males
adult_males = titanic_df[(titanic_df['Sex'] == 'male') & (titanic_df['Age'] >= 18)]
print(adult_males)


In [None]:
# Sort by fare_per_class descending
adult_males_srt = adult_males.sort_values(by='fare_per_class', ascending=False)
print(adult_males_srt)


In [None]:
# Select relevant columns
result = adult_males_srt[['Name', 'Age', 'fare_per_class']]
print(result)


In [None]:
# Total fare paid by all passengers
total_fare = titanic_df['Fare'].sum()
print(total_fare)


In [None]:
# Total fare by class
fare_first_class = titanic_df[titanic_df['Pclass'] == 1]['Fare'].sum()
fare_second_class = titanic_df[titanic_df['Pclass'] == 2]['Fare'].sum()
fare_third_class = titanic_df[titanic_df['Pclass'] == 3]['Fare'].sum()

print(fare_first_class, fare_second_class, fare_third_class)


In [None]:
# Create a list of fare totals by class
class_fare_totals = [fare_first_class, fare_second_class, fare_third_class]
print(class_fare_totals)


In [None]:
# Proportions of fare revenue by class
class_fare_proportion = [fare / total_fare for fare in class_fare_totals]
print(class_fare_proportion)


In [None]:
# Categorize passengers by age
titanic_df['age_group'] = pd.cut(titanic_df['Age'], bins=[0, 18, 64, float('inf')], labels=['child', 'adult', 'senior'])


In [None]:
# Total number of passengers
total_passengers = len(titanic_df)


In [None]:
# Count passengers in each age group
age_group_counts = titanic_df['age_group'].value_counts()
print(age_group_counts)


In [None]:
# Proportions of passengers in each age group
age_group_proportion = age_group_counts / total_passengers * 100
print(age_group_proportion)
