# Logistic Regression

In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [2]:
n = 8

df = pd.read_csv(f'../data/titanic/titanic_{n}_train.csv')
x_train = df.drop('survived', axis=1)
y_train = df['survived']
print(len(x_train))


8


In [3]:
model = LogisticRegression()

model.fit(x_train, y_train)

In [6]:
df = pd.read_csv(f'../data/titanic/titanic_{n}_test.csv')
x_test = df.drop('survived', axis=1)
y_test = df['survived']

In [7]:
y_proba = model.predict_proba(x_test)[:, 1]
y_pred = (y_proba > 0.5).astype(int)

In [8]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.45413363533408835
Precision: 0.3977272727272727
Recall: 0.8284023668639053
F1 score: 0.5374280230326295
ROC-AUC: 0.6312225177786221


In [9]:
from sklearn.metrics import classification_report

# 性能の評価
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.68      0.22      0.33       545
           1       0.40      0.83      0.54       338

    accuracy                           0.45       883
   macro avg       0.54      0.53      0.44       883
weighted avg       0.57      0.45      0.41       883



### Moon dataset

In [13]:
n = 600
df = pd.read_csv(f'../data/moon/moon_{n}_train.csv')

x_train = df.drop('Target', axis=1)
y_train = df['Target']

model = LogisticRegression()

model.fit(x_train, y_train)

In [14]:
df = pd.read_csv(f'../data/moon/moon_{n}_test.csv')
x_test = df.drop('Target', axis=1)
y_test = df['Target']

y_proba = model.predict_proba(x_test)[:, 1]
y_pred = (y_proba > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.89
Precision: 0.920863309352518
Recall: 0.8533333333333334
F1 score: 0.8858131487889273
ROC-AUC: 0.9667555555555556


### Circle

In [11]:
n = 600
df = pd.read_csv(f'../data/circle/circle_{n}_train.csv')

x_train = df.drop('Target', axis=1)
y_train = df['Target']

model = LogisticRegression()

model.fit(x_train, y_train)

In [12]:
df = pd.read_csv(f'../data/circle/circle_{n}_test.csv')
x_test = df.drop('Target', axis=1)
y_test = df['Target']

y_proba = model.predict_proba(x_test)[:, 1]
y_pred = (y_proba > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.49
Precision: 0.4906832298136646
Recall: 0.5266666666666666
F1 score: 0.5080385852090032
ROC-AUC: 0.4658666666666667


### Pseudodata

In [15]:
columns = 4
train_data = 300

file_name = f'pseudodata_{columns}f_{train_data}_train.csv'


df = pd.read_csv(f'../data/pseudodata/{file_name}')
x_train = df.drop('target', axis=1)
y_train = df['target']


model = LogisticRegression()

model.fit(x_train, y_train)

In [16]:
columns = 4
train_data = 300

file_name = f'pseudodata_{columns}f_{train_data}_test.csv'


df = pd.read_csv(f'../data/pseudodata/{file_name}')
x_test = df.drop('target', axis=1)
y_test = df['target']

y_proba = model.predict_proba(x_test)[:, 1]
y_pred = (y_proba > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Precision
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# Recall
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1 score
f1 = f1_score(y_test, y_pred)
print(f'F1 score: {f1}')

# ROC-AUC (you need prediction probabilities for this, not just class predictions)
# Here we just reuse y_pred for simplicity
roc_auc = roc_auc_score(y_test, y_proba)
print(f'ROC-AUC: {roc_auc}')

Accuracy: 0.94
Precision: 0.9117647058823529
Recall: 0.9742857142857143
F1 score: 0.9419889502762431
ROC-AUC: 0.9666122448979592
