In [0]:
!pip install kaggle

In [0]:
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth

auth.authenticate_user()

drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])

filename = "/root/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)

request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)


In [0]:
!kaggle competitions list

In [0]:
!kaggle competitions download -c titanic

In [0]:
import numpy as np
import pandas as pd

train_data = pd.read_csv('train.csv')
train_data.head()

In [0]:
test_data = pd.read_csv('test.csv')
test_data.head()

In [0]:
women = train_data.loc[train_data.Sex == 'female']['Survived']
rate_women = sum(women)/len(women)

print('% of women who survived:', rate_women)

In [0]:
men = train_data.loc[train_data.Sex == 'male']['Survived']
rate_men = sum(men)/len(men)

print('% of men who survived:', rate_men)

In [0]:
mean_age = train_data['Age'].mean()
print(mean_age)

In [0]:
train_data.Age.fillna(value=mean_age, inplace=True)
test_data.Age.fillna(value=mean_age, inplace=True)

In [0]:
train_data['Sex_bool'] = train_data['Sex'].apply(lambda x: 1 if x == 'male' else 0)
train_data.head()

In [0]:
test_data['Sex_bool'] = test_data['Sex'].apply(lambda x: 1 if x == 'male' else 0)
test_data.head()

In [0]:
y = train_data['Survived']

features = ['Pclass', 'Sex_bool', 'SibSp', 'Parch', 'Age']
X = train_data[features]
X_test = test_data[features]

In [0]:
from xgboost import XGBClassifier
from sklearn.metrics import mean_absolute_error

my_model = XGBClassifier(n_estimators=1000, learning_rate=0.5, objective='binary:logistic', max_depth=5)
my_model.fit(X, y)

predictions = my_model.predict(X_test)

In [0]:
output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions})
output.to_csv('my_submission.csv', index=False)

In [0]:
!cat my_submission.csv

In [0]:
!kaggle competitions submit -c titanic -f my_submission.csv -m 'second submit'