# 0. 定義問題
- 請問這個題目是屬於 **迴歸** or **分類** 問題  
  
分類問題的模型: logistic regression, decision tree, SVM  
分類問題的評估指標: confusion matrix, accuracy, recall, precision, F1-score

# 1. Load data & check data

In [1]:
# 將我們會使用到的套件 import 進來

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
# 將我們會用到的資料集使用 pandas 讀入，存成 dataframe
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_all = pd.concat([df_train, df_test])

In [3]:
n_train = df_train.shape[0]

In [4]:
df_all

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0.0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


### 檢視資料
這個步驟我們關心的有:
- 資料的維度
- 資料欄位的型態
- 資料缺失值多寡

In [5]:
# 觀察資料前 5 筆、後5筆、或是隨機取樣5筆
df_all.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0.0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
df_all.tail()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
413,1305,,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S
414,1306,,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9,C105,C
415,1307,,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S
416,1308,,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S
417,1309,,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C


In [7]:
df_all.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
649,650,1.0,3,"Stanley, Miss. Amy Zillah Elsie",female,23.0,0,0,CA. 2314,7.55,,S
81,973,,1,"Straus, Mr. Isidor",male,67.0,1,0,PC 17483,221.7792,C55 C57,S
343,1235,,1,"Cardeza, Mrs. James Warburton Martinez (Charlo...",female,58.0,0,1,PC 17755,512.3292,B51 B53 B55,C
862,863,1.0,1,"Swift, Mrs. Frederick Joel (Margaret Welles Ba...",female,48.0,0,0,17466,25.9292,D17,S
182,183,0.0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9.0,4,2,347077,31.3875,,S


In [8]:
# 查看資料欄位的NA狀態、型態
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1309 entries, 0 to 417
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  1309 non-null   int64  
 1   Survived     891 non-null    float64
 2   Pclass       1309 non-null   int64  
 3   Name         1309 non-null   object 
 4   Sex          1309 non-null   object 
 5   Age          1046 non-null   float64
 6   SibSp        1309 non-null   int64  
 7   Parch        1309 non-null   int64  
 8   Ticket       1309 non-null   object 
 9   Fare         1308 non-null   float64
 10  Cabin        295 non-null    object 
 11  Embarked     1307 non-null   object 
dtypes: float64(3), int64(4), object(5)
memory usage: 132.9+ KB


In [9]:
# 查看資料的統計量(數值型欄位)
df_all.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,1309.0,891.0,1309.0,1046.0,1309.0,1309.0,1308.0
mean,655.0,0.383838,2.294882,29.881138,0.498854,0.385027,33.295479
std,378.020061,0.486592,0.837836,14.413493,1.041658,0.86556,51.758668
min,1.0,0.0,1.0,0.17,0.0,0.0,0.0
25%,328.0,0.0,2.0,21.0,0.0,0.0,7.8958
50%,655.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,982.0,1.0,3.0,39.0,1.0,0.0,31.275
max,1309.0,1.0,3.0,80.0,8.0,9.0,512.3292


In [10]:
# 類別型欄位
df_all.describe(include=['O'])

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
count,1309,1309,1309,295,1307
unique,1307,2,929,186,3
top,"Connolly, Miss. Kate",male,CA. 2343,C23 C25 C27,S
freq,2,843,11,6,914


In [11]:
#


# 2.資料探索 - EDA

# 3. 資料前處理 - Data Preprocessing

- 切訓練集與驗證集
- 補缺失值 - NA
- 資料編碼 - Encoding
- 標準化與正規化

In [12]:
# 複製一份資料集來做前處理
df_pre = df_all.copy()

In [13]:
df_pre

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0.0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


### 3-2 補缺失值

In [14]:
df_pre.isna().sum()

PassengerId       0
Survived        418
Pclass            0
Name              0
Sex               0
Age             263
SibSp             0
Parch             0
Ticket            0
Fare              1
Cabin          1014
Embarked          2
dtype: int64

In [15]:
# 針對缺失值開始補值
age_mean = df_pre.Age.mean()
df_pre['Age'] = df_pre.Age.fillna(age_mean)
# df_pre.Age.fillna(age_mean, inplace=True)

In [16]:
df_pre.isna().sum()

PassengerId       0
Survived        418
Pclass            0
Name              0
Sex               0
Age               0
SibSp             0
Parch             0
Ticket            0
Fare              1
Cabin          1014
Embarked          2
dtype: int64

In [17]:
'''
關於 Cabin:
1. 這個欄位不重要 -> 刪掉欄位
2. 在某個條件下才會有 Cabin 值
3. 它的種類有很多種
'''

'\n關於 Cabin:\n1. 這個欄位不重要 -> 刪掉欄位\n2. 在某個條件下才會有 Cabin 值\n3. 它的種類有很多種\n'

In [18]:
# Cabin有值
df_pre[~df_pre.Cabin.isna()][['Survived', 'Age', 'Fare']].mean()

Survived     0.666667
Age         36.373512
Fare        81.928998
dtype: float64

In [19]:
# Cabin沒有值
df_pre[df_pre.Cabin.isna()][['Survived', 'Age', 'Fare']].mean()

Survived     0.299854
Age         27.992330
Fare        19.132707
dtype: float64

In [20]:
df_pre['Cabin_notna'] = (~df_pre.Cabin.isna()).astype(int)

In [21]:
df_pre

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Cabin_notna
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.000000,1,0,A/5 21171,7.2500,,S,0
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.000000,1,0,PC 17599,71.2833,C85,C,1
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.000000,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.000000,1,0,113803,53.1000,C123,S,1
4,5,0.0,3,"Allen, Mr. William Henry",male,35.000000,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,,3,"Spector, Mr. Woolf",male,29.881138,0,0,A.5. 3236,8.0500,,S,0
414,1306,,1,"Oliva y Ocana, Dona. Fermina",female,39.000000,0,0,PC 17758,108.9000,C105,C,1
415,1307,,3,"Saether, Mr. Simon Sivertsen",male,38.500000,0,0,SOTON/O.Q. 3101262,7.2500,,S,0
416,1308,,3,"Ware, Mr. Frederick",male,29.881138,0,0,359309,8.0500,,S,0


In [22]:
df_pre.Cabin.unique()

array([nan, 'C85', 'C123', 'E46', 'G6', 'C103', 'D56', 'A6',
       'C23 C25 C27', 'B78', 'D33', 'B30', 'C52', 'B28', 'C83', 'F33',
       'F G73', 'E31', 'A5', 'D10 D12', 'D26', 'C110', 'B58 B60', 'E101',
       'F E69', 'D47', 'B86', 'F2', 'C2', 'E33', 'B19', 'A7', 'C49', 'F4',
       'A32', 'B4', 'B80', 'A31', 'D36', 'D15', 'C93', 'C78', 'D35',
       'C87', 'B77', 'E67', 'B94', 'C125', 'C99', 'C118', 'D7', 'A19',
       'B49', 'D', 'C22 C26', 'C106', 'C65', 'E36', 'C54',
       'B57 B59 B63 B66', 'C7', 'E34', 'C32', 'B18', 'C124', 'C91', 'E40',
       'T', 'C128', 'D37', 'B35', 'E50', 'C82', 'B96 B98', 'E10', 'E44',
       'A34', 'C104', 'C111', 'C92', 'E38', 'D21', 'E12', 'E63', 'A14',
       'B37', 'C30', 'D20', 'B79', 'E25', 'D46', 'B73', 'C95', 'B38',
       'B39', 'B22', 'C86', 'C70', 'A16', 'C101', 'C68', 'A10', 'E68',
       'B41', 'A20', 'D19', 'D50', 'D9', 'A23', 'B50', 'A26', 'D48',
       'E58', 'C126', 'B71', 'B51 B53 B55', 'D49', 'B5', 'B20', 'F G63',
       'C62 C64',

In [23]:
df_pre = df_pre.drop(columns=['Cabin'])
# df.drop(['column_nameA', 'column_nameB'], axis=1, inplace=True)
# df = df.drop('column_name', axis=1)

In [24]:
# 檢查是否還有缺失值
df_pre.isna().sum()

PassengerId      0
Survived       418
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             1
Embarked         2
Cabin_notna      0
dtype: int64

In [25]:
df_pre.Fare.describe()['50%']

14.4542

In [26]:
fare_median = df_pre.Fare.median()
df_pre['Fare'] = df_pre.Fare.fillna(fare_median)

In [27]:
df_pre.isna().sum()

PassengerId      0
Survived       418
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Embarked         2
Cabin_notna      0
dtype: int64

In [28]:
df_pre.Embarked.describe()['top']

'S'

In [29]:
embarked_mode = df_pre.Embarked.mode()[0]
df_pre['Embarked'] = df_pre.Embarked.fillna(embarked_mode)

In [30]:
df_pre.isna().sum()

PassengerId      0
Survived       418
Pclass           0
Name             0
Sex              0
Age              0
SibSp            0
Parch            0
Ticket           0
Fare             0
Embarked         0
Cabin_notna      0
dtype: int64

### 3-3 資料編碼
- label encoding
- one hot encoding  
Name, Sex, Ticket, Embarked

In [32]:
# 假設 Ticket, Name 不重要
df_pre.Ticket.value_counts()

CA. 2343        11
CA 2144          8
1601             8
PC 17608         7
S.O.C. 14879     7
                ..
113792           1
36209            1
323592           1
315089           1
359309           1
Name: Ticket, Length: 929, dtype: int64

In [33]:
df_pre = df_pre.drop(columns=['Name', 'Ticket'])

In [34]:
'''
關於性別:
1. 隨便給他們 0, 1 的值 -> male: 0, female: 1
2. 考慮不同性別的生存率，生存率高的性別 = 1
'''
df_pre['Sex'] = df_pre.Sex.map({'male': 0, 'female': 1})

In [35]:
df_pre.groupby('Sex').mean()['Survived']

Sex
0    0.188908
1    0.742038
Name: Survived, dtype: float64

In [36]:
'''
關於港口:
1. 隨便給他們 0, 1, 2 的值 -> S: 0, C: 1, Q: 2
2. 考慮不同港口的生存率，生存率高的港口 = 1
'''
# df_pre['Embarked'] = df_pre.Embarked.map({'S': 0, 'C': 1, 'Q': 2})
df_pre['Embarked'] = df_pre.Embarked.map({'S': 0, 'C': 2, 'Q': 1})

In [37]:
df_pre.groupby('Embarked').mean()['Survived']

Embarked
0    0.339009
1    0.389610
2    0.553571
Name: Survived, dtype: float64

In [31]:
## 如果要用 one hot encoding ##
pd.get_dummies(df_pre['Embarked'])

Unnamed: 0,C,Q,S
0,0,0,1
1,1,0,0
2,0,0,1
3,0,0,1
4,0,0,1
...,...,...,...
413,0,0,1
414,1,0,0
415,0,0,1
416,0,0,1


In [38]:
# 檢查是否資料都已經編碼完成
df_pre.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1309 entries, 0 to 417
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  1309 non-null   int64  
 1   Survived     891 non-null    float64
 2   Pclass       1309 non-null   int64  
 3   Sex          1309 non-null   int64  
 4   Age          1309 non-null   float64
 5   SibSp        1309 non-null   int64  
 6   Parch        1309 non-null   int64  
 7   Fare         1309 non-null   float64
 8   Embarked     1309 non-null   int64  
 9   Cabin_notna  1309 non-null   int32  
dtypes: float64(3), int32(1), int64(6)
memory usage: 107.4 KB


### 3-4 標準化與正規化

In [39]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()

In [40]:
df_pre[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']] = \
    std_scaler.fit_transform(df_pre[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']])

In [41]:
df_pre

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Cabin_notna
0,1,0.0,0.841916,-0.743497,-0.611972,0.481288,-0.445000,-0.503291,-0.622279,0
1,2,1.0,-1.546098,1.344995,0.630431,0.481288,-0.445000,0.734744,1.834926,1
2,3,1.0,0.841916,1.344995,-0.301371,-0.479087,-0.445000,-0.490240,-0.622279,0
3,4,1.0,-1.546098,1.344995,0.397481,0.481288,-0.445000,0.383183,-0.622279,1
4,5,0.0,0.841916,-0.743497,0.397481,-0.479087,-0.445000,-0.487824,-0.622279,0
...,...,...,...,...,...,...,...,...,...,...
413,1305,,0.841916,-0.743497,0.000000,-0.479087,-0.445000,-0.487824,-0.622279,0
414,1306,,-1.546098,1.344995,0.708081,-0.479087,-0.445000,1.462034,1.834926,1
415,1307,,0.841916,-0.743497,0.669256,-0.479087,-0.445000,-0.503291,-0.622279,0
416,1308,,0.841916,-0.743497,0.000000,-0.479087,-0.445000,-0.487824,-0.622279,0


### 3-1 切資料集

In [42]:
train_pre = df_pre[:n_train]
test_pre = df_pre[n_train:]

In [43]:
X = train_pre.drop(columns=['PassengerId', 'Survived'])
y = train_pre['Survived']

In [44]:
# 切分訓練集與驗證集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, # XXyy
                                                    test_size=0.2, random_state=10,
                                                    stratify=y) # 控制 y 在 train 和 test 的比例


In [45]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((712, 8), (179, 8), (712,), (179,))

In [46]:
y_train.value_counts(normalize=True)

0.0    0.616573
1.0    0.383427
Name: Survived, dtype: float64

In [47]:
y_test.value_counts(normalize=True)

0.0    0.614525
1.0    0.385475
Name: Survived, dtype: float64

# 4. 建立模型

### 4-1. 選擇模型

In [48]:
# 選擇模型，記得要選對是「迴歸」或是「分類」模型
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

### 4-2. 訓練模型

In [49]:
# 訓練模型，要選對要訓練的資料集
lr = LogisticRegression()
lr.fit(X_train, y_train)

svc = SVC()
svc.fit(X_train, y_train)

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

In [50]:
# 看一下預測結果
lr.predict(X_test)

array([1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1.,
       0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0.,
       1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 0.,
       1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0.,
       1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
       0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
       1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
       1., 1., 0., 0., 1., 0., 0., 0., 0.])

In [51]:
# 用模型的 score 方法初步判斷結果
print(lr.score(X_test, y_test))
print(svc.score(X_test, y_test))
print(dt.score(X_test, y_test))

0.7988826815642458
0.8268156424581006
0.8268156424581006


# 5. 評估模型

#### 單純的看準確度

In [52]:
from sklearn import metrics
metrics.accuracy_score(y_test, lr.predict(X_test))

0.7988826815642458

In [53]:
metrics.recall_score(y_true=y_test, y_pred=lr.predict(X_test))

0.7101449275362319

#### 混淆矩陣

In [54]:
cfmx = metrics.confusion_matrix(y_true=y_test, y_pred=lr.predict(X_test))
pd.DataFrame(data=cfmx, columns=['pred0', 'pred1'], index=['true0', 'true1'])

Unnamed: 0,pred0,pred1
true0,94,16
true1,20,49


In [55]:
print(metrics.classification_report(y_true=y_test, y_pred=lr.predict(X_test)))

              precision    recall  f1-score   support

         0.0       0.82      0.85      0.84       110
         1.0       0.75      0.71      0.73        69

    accuracy                           0.80       179
   macro avg       0.79      0.78      0.79       179
weighted avg       0.80      0.80      0.80       179



#### Cross Validation

In [56]:
from sklearn.model_selection import cross_val_score
print(np.mean(cross_val_score(lr, X, y)))
print(np.mean(cross_val_score(svc, X, y)))
print(np.mean(cross_val_score(dt, X, y)))

0.7901198920343984
0.827154604230745
0.7946644906157806


### 模型調參
- random search / grid search

In [57]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,
                              random_state=0)

distributions = dict(C=uniform(loc=0, scale=4),
                     penalty=['l2', 'l1'])

clf = RandomizedSearchCV(logistic, distributions, random_state=0)
search = clf.fit(X_train, y_train)
search.best_params_

{'C': 0.22685190926977272, 'penalty': 'l2'}

In [58]:
logistic = LogisticRegression(solver='saga', tol=1e-2, max_iter=200,
                              random_state=0, **search.best_params_)
logistic.fit(X_train, y_train)
logistic.score(X_test, y_test)

0.7988826815642458

# 6. 預測答案

- 讀取 test.csv
- 做與 train set 一樣的前處理方式
- 使用模型對 test 做預測

In [59]:
test_pred = lr.predict(test_pre.drop(columns=['PassengerId', 'Survived']))

In [60]:
ans = test_pre.copy()[['PassengerId', 'Survived']]
ans['Survived'] = test_pred
ans

Unnamed: 0,PassengerId,Survived
0,892,0.0
1,893,0.0
2,894,0.0
3,895,0.0
4,896,1.0
...,...,...
413,1305,0.0
414,1306,1.0
415,1307,0.0
416,1308,0.0
