In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("Titanic-Dataset.csv")
selected_columns = ['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
data = data[selected_columns]
data.shape[0]

891

In [3]:
data = data.dropna()
data

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.2500,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.9250,S
3,4,1,1,female,35.0,1,0,53.1000,S
4,5,0,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...,...
885,886,0,3,female,39.0,0,5,29.1250,Q
886,887,0,2,male,27.0,0,0,13.0000,S
887,888,1,1,female,19.0,0,0,30.0000,S
889,890,1,1,male,26.0,0,0,30.0000,C


In [4]:
data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch',
       'Fare', 'Embarked'],
      dtype='object')

In [5]:
selected_columns = ['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

In [6]:
data = data[selected_columns]

In [7]:
data.loc[:, "Age"] = data["Age"].map(lambda x: int(x) if pd.notna(x) else pd.NA)

## categorize features

In [8]:
def set_age(x):
    if pd.isna(x):
        return pd.NA
    elif x < 18:
        return "minor"
    else:
        return "adult"
    
age_cat = data.loc[:, "Age"].map(lambda x: set_age(x)).copy()
data.loc[:, "AgeCat"] = age_cat

embarked = data.loc[:, "Embarked"].map(lambda x: x.lower()).copy()
data.loc[:, "Embarked"] = embarked

In [9]:
data.drop(columns=["Age"], inplace=True)

In [10]:
data["AgeCat"].value_counts()

AgeCat
adult    599
minor    113
Name: count, dtype: int64

## creates background knowledge

In [11]:
f = open("properties", "w")

In [12]:
for pid in data["PassengerId"]:
    f.write(f"passenger({pid}).\n")

In [13]:
for pid in data["PassengerId"]:
    pclass = data.loc[pid - 1, "Pclass"]
    f.write(f"class({pid}, {pclass}).\n")

In [14]:
for pid in data["PassengerId"]:
    sex = data.loc[pid - 1, "Sex"]
    f.write(f"sex({pid}, {sex}).\n")

In [15]:
for pid in data["PassengerId"]:
    age = data.loc[pid - 1, "AgeCat"]
    f.write(f"age({pid}, {age}).\n")

In [16]:
for pid in data["PassengerId"]:
    embarked = data.loc[pid - 1, "Embarked"]
    f.write(f"embarked({pid}, {embarked}).\n")

In [17]:
f = open("survived", "w")
for pid in data["PassengerId"]:
    if data.loc[pid - 1, "Survived"] == 1:
        f.write(f"survived({pid}).\n")

In [18]:
f = open("dead", "w")
for pid in data["PassengerId"]:
    if data.loc[pid - 1, "Survived"] == 0:
        f.write(f"survived({pid}).\n")

## creates Popper examples

In [19]:
f = open("examples", "w")
for pid in data["PassengerId"]:
    if data.loc[pid - 1, "Survived"] == 1:
        f.write(f"pos(survived({pid})).\n")

f = open("examples", "a")
for pid in data["PassengerId"]:
    if data.loc[pid - 1, "Survived"] == 0:
        f.write(f"neg(survived({pid})).\n")