In [1]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
data_raw = pd.read_csv('./train.csv')
data_raw.loc[data_raw['Sex'] == 'male', 'Sex'] = 1
data_raw.loc[data_raw['Sex'] == 'female', 'Sex'] = 0

data_raw.drop(index=data_raw[data_raw['Age'].isnull()].index, inplace=True)


data_X = data_raw[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
data_y = data_raw['Survived']

scal = StandardScaler()
scal.fit_transform(data_X)

train_X, val_X, train_y, val_y = train_test_split(data_X, data_y, test_size=0.2)

In [3]:
ros = RandomOverSampler(random_state=0)

train_X, train_y = ros.fit_resample(train_X, train_y)

In [4]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [5]:
svc = SVC()
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [6]:
params = {
    'C': [0.5, 1.0, 10.0, 100.0, 1000.0],
    'gamma': [1,0.1,0.01,0.001]
}
gs = GridSearchCV(svc, params, scoring='accuracy')

In [7]:
svc_fitted = gs.fit(train_X, train_y, )

In [8]:
pred = svc_fitted.predict(val_X)
print(classification_report(val_y, pred))

              precision    recall  f1-score   support

           0       0.74      0.84      0.79        83
           1       0.73      0.60      0.66        60

    accuracy                           0.74       143
   macro avg       0.74      0.72      0.73       143
weighted avg       0.74      0.74      0.74       143



In [9]:
from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(val_y, pred))

0.25874125874125875


In [10]:
test = pd.read_csv('./test.csv')
pas = test["PassengerId"]

test.loc[test["Sex"] == 'male', "Sex"] = 1
test.loc[test["Sex"] == 'female', "Sex"] = 0

test.loc[test["Age"].isnull(), "Age"] = test['Age'].median()
test.loc[test["Fare"].isnull(), "Fare"] = test['Fare'].median()

test_X = test[["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare"]]

scal.fit_transform(test_X)

pred = svc_fitted.predict(test_X)

print(pred)

pd.concat([pas, pd.DataFrame(pred, columns=["Survived"])], ignore_index=True, axis=1).to_csv('final.csv', index=False)

[0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1
 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 1 1 1 1 1 1 0 1 1
 0 0 0 1 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0
 1 0 1 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0
 0 1 0 0 0 0 1 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 1 1 1 0 1 0
 1 0 0 1 0 1 0 0 1 1 0 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 1
 0 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
 1 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1 1 0
 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0
 1 1 1 1 1 1 0 1 0 0 0]
