Skip to content

Commit

Permalink
feat: add Titanic surviver predict model #5
Browse files Browse the repository at this point in the history
only use Pclass [accuracy rate: 0.68539]
  • Loading branch information
acio-o9 committed May 27, 2020
1 parent 5c87503 commit 712e96d
Show file tree
Hide file tree
Showing 2 changed files with 331 additions and 0 deletions.
301 changes: 301 additions & 0 deletions model_practice/TitanicSurviverPredict.ipynb
@@ -0,0 +1,301 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "TitanicSurviverPredict.ipynb",
"provenance": [],
"mount_file_id": "18XnKwoJOB9KTOZ3MX5HKvkqQqSgBbUqI",
"authorship_tag": "ABX9TyOUiSmOPBzbWY9au1FAceHZ",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/acio-o9/python-workspace/blob/feature%2FTitanic-5%2Fadd-titanic-surviver-predict-model/model_practice/TitanicSurviverPredict.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "utbUA1TKqFGo",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"import numpy as np"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "v96JX7Flle9E",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 272
},
"outputId": "61515dfc-9d12-4748-a25a-5f14c0fb660f"
},
"source": [
"train_data = pd.read_csv('drive/My Drive/train_data/titanic_train.csv')\n",
"train_data.head()"
],
"execution_count": 52,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass ... Fare Cabin Embarked\n",
"0 1 0 3 ... 7.2500 NaN S\n",
"1 2 1 1 ... 71.2833 C85 C\n",
"2 3 1 3 ... 7.9250 NaN S\n",
"3 4 1 1 ... 53.1000 C123 S\n",
"4 5 0 3 ... 8.0500 NaN S\n",
"\n",
"[5 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 52
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vp49nn2usonr",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "23381355-cc6e-4bac-d630-7224fa1e93ca"
},
"source": [
"X = pd.DataFrame(train_data.iloc[:, 2]) # Pclass \n",
"y = pd.DataFrame(train_data.iloc[:, 1]) # Survived\n",
"print(X.shape)\n",
"print(y.shape)"
],
"execution_count": 53,
"outputs": [
{
"output_type": "stream",
"text": [
"(891, 1)\n",
"(891, 1)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "avgsvmL9t_sn",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "6c87c837-ba27-449f-edcb-9ad746f6ef4d"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n",
"print(X_train.shape)\n",
"print(X_test.shape)"
],
"execution_count": 54,
"outputs": [
{
"output_type": "stream",
"text": [
"(623, 1)\n",
"(268, 1)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "kbX47pzmu5D-",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 88
},
"outputId": "398a99bb-ee87-409d-e3ef-ef137c5619ad"
},
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)\n",
"print(model.score(X_train, y_train))"
],
"execution_count": 55,
"outputs": [
{
"output_type": "stream",
"text": [
"0.6853932584269663\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WFqxHH6NvQ50",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
30 changes: 30 additions & 0 deletions model_practice/titanicsurviverpredict.py
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
"""TitanicSurviverPredict.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18XnKwoJOB9KTOZ3MX5HKvkqQqSgBbUqI
"""

import pandas as pd
import numpy as np

train_data = pd.read_csv('drive/My Drive/train_data/titanic_train.csv')
train_data.head()

X = pd.DataFrame(train_data.iloc[:, 2]) # Pclass
y = pd.DataFrame(train_data.iloc[:, 1]) # Survived
print(X.shape)
print(y.shape)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print(X_train.shape)
print(X_test.shape)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))

0 comments on commit 712e96d

Please sign in to comment.