In [38]:
import pandas as pd

train_data = pd.read_csv(r"D:\Projects\Titanic-ML\train.csv")

def calculate_survival_rate(data, condition):
    total = data.loc[condition]
    total_count = len(total)
    rate = total_count / len(data.loc[condition.keys()])
    return total_count, rate

women_count, women_rate = calculate_survival_rate(train_data, (train_data.Sex == 'female') & (train_data.Survived == 0))
print(f"Total women who did not survive: {women_count}")
print(f"% of women who did not survive: {women_rate * 100:.3f} %\n")

men_count, men_rate = calculate_survival_rate(train_data, (train_data.Sex == 'male') & (train_data.Survived == 0))
print(f"Total men who did not survive: {men_count}")
print(f"% of men who did not survive: {men_rate * 100:.3f} %\n")

for i in range(1, 4):
    class_count, class_rate = calculate_survival_rate(train_data, (train_data.Pclass == i) & (train_data.Survived == 0))
    print(f"Total people in {i} Class who did not survive: {class_count}")
    print(f"% of people in {i} Class who did not survive: {class_rate * 100:.3f} %\n")

total_count, overall_rate = calculate_survival_rate(train_data, (train_data.Survived == 0))
print(f"Total who did not survive: {total_count}")
print(f"% of all people who did not survive: {overall_rate * 100:.3f} %\n")

Total women who did not survive: 81
% of women who did not survive: 9.091 %

Total men who did not survive: 468
% of men who did not survive: 52.525 %

Total people in 1 Class who did not survive: 80
% of people in 1 Class who did not survive: 8.979 %

Total people in 2 Class who did not survive: 97
% of people in 2 Class who did not survive: 10.887 %

Total people in 3 Class who did not survive: 372
% of people in 3 Class who did not survive: 41.751 %

Total who did not survive: 549
% of all people who did not survive: 61.616 %



In [39]:
from sklearn.ensemble import RandomForestClassifier

train_data = pd.read_csv(r"D:\Projects\Titanic-ML\train.csv")
test_data = pd.read_csv(r"D:\Projects\Titanic-ML\test.csv")

y = train_data["Survived"]

features = ["Pclass", "Sex", "SibSp", "Parch","Fare", "Embarked", "Age"]
X = pd.get_dummies(train_data[features])
X_test = pd.get_dummies(test_data[features])

model = RandomForestClassifier(n_estimators=1000, max_depth=5, random_state=0)
model.fit(X, y)
predictions = model.predict(X_test)

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions})
output.head(10)


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
5,897,0
6,898,1
7,899,0
8,900,1
9,901,0


In [40]:
output.to_csv(r"D:\Projects\Titanic-ML\predictions.csv", index=False)

In [41]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import pandas as pd

train_data = pd.read_csv(r"D:\Projects\Titanic-ML\train.csv")
test_data = pd.read_csv(r"D:\Projects\Titanic-ML\test.csv")

y = train_data["Survived"]

features = ["Pclass", "Sex", "SibSp", "Parch"]
X = pd.get_dummies(train_data[features])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=0)

models = {
    "Logistic Regression": LogisticRegression(max_iter=2000, random_state=0),
    "Random Forest": RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=1000, max_depth=10, random_state=0),
    "Support Vector Machine": SVC(kernel='linear', random_state=0)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    score = model.score(X_val, y_val)
    print(f"{name}: {score:.4f}")


Logistic Regression: 0.7932
Random Forest: 0.7864
Gradient Boosting: 0.7627
Support Vector Machine: 0.7831


In [42]:
test_data = pd.read_csv(r"D:\Projects\Titanic-ML\train.csv")
test_data.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C
