<a href="https://colab.research.google.com/github/daisuke08253649/DeepLearning/blob/main/Liver_disease_determination.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve

from joblib import dump, load


df = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/train_2.csv')
df['AG_ratio'].fillna(df['Alb'] / (df['TP'] - df['Alb']), inplace=True)
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
df['Gender'] = df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

X = df.drop(['disease'], axis=1)
y = df['disease']

bins_T_Bil = [0, 0.5, 1.0, 100]
X_cut, bin_indice = pd.cut(X['T_Bil'], bins=bins_T_Bil, retbins=True, labels=False)
X_dummies = pd.get_dummies(X_cut, prefix=X_cut.name)
X_binned = pd.concat([X, X_dummies], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred_prob = lr.predict_proba(X_test)[:, 1]

auc_score = roc_auc_score(y_test, y_pred_prob)
print(auc_score)

fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
plt.plot(fpr, tpr, label='roc curve (area = %0.3f)' %auc_score)
plt.plot([0, 1], [0, 1], linestyle=':', label='random')
plt.plot([0, 0, 1], [0, 1, 1], linestyle=':', label='ideal')
plt.legend()
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.show()

dump(lr, './drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/model.joblib')

In [None]:
model = load('./drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/model.joblib')

# 提出用CSVをロード
submit_df = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/sample_submit_2.csv')

# テストデータセットをロードし、インデックスをリセット
test_df = pd.read_csv('./drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/test_2.csv')
test_df['Gender'] = test_df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

# テストデータが提出用CSVよりも1行多い場合、最後の行を削除
if len(test_df) > len(submit_df):
    test_df.drop(test_df.index[-1], inplace=True)

# 再度行数のアサーションチェック
assert len(test_df) == len(submit_df), "The number of test samples and submission samples should be the same"

# テストデータと提出用CSVの行数をチェック
if len(test_df) != len(submit_df):
    print(f"Test data length: {len(test_df)}, Submission data length: {len(submit_df)}")
    raise ValueError("The number of test samples and submission samples should be the same")

# テストデータセットで予測を行い、提出用CSVに予測値を格納
predict = model.predict(test_df)
submit_df.iloc[:, 1] = predict

# 最初の5行を表示して確認
print(submit_df.head())

# 別のファイル名で提出用CSVを保存
submit_df.to_csv('./drive/MyDrive/DeepLearning/SIGNATE/Liver_disease_determination/sample_submit_2.csv', index=False)