## 참고: https://hleecaster.com/ml-linear-regression-example/

## 라이브러리 설치, 호출

!pip3 install -U scikit-learn<br>
!pip3 install pandas<br>
!pip3 install numpy<br>
!pip3 install matplotlib<br>
!pip3 install statsmodels<br>

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau

## 기본적인 데이터 처리 (정보 확인, 결측치 처리, Drop) 

In [2]:
import pandas as pd
df = pd.read_csv('/Users/krc/Downloads/healthcare-dataset-stroke-data.csv')
print(df.head())
print(df.shape)

      id  gender   age  hypertension  heart_disease ever_married  \
0   9046    Male  67.0             0              1          Yes   
1  51676  Female  61.0             0              0          Yes   
2  31112    Male  80.0             0              1          Yes   
3  60182  Female  49.0             0              0          Yes   
4   1665  Female  79.0             1              0          Yes   

       work_type Residence_type  avg_glucose_level   bmi   smoking_status  \
0        Private          Urban             228.69  36.6  formerly smoked   
1  Self-employed          Rural             202.21   NaN     never smoked   
2        Private          Rural             105.92  32.5     never smoked   
3        Private          Urban             171.23  34.4           smokes   
4  Self-employed          Rural             174.12  24.0     never smoked   

   stroke  
0       1  
1       1  
2       1  
3       1  
4       1  
(5110, 12)


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5110 entries, 0 to 5109
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 5110 non-null   int64  
 1   gender             5110 non-null   object 
 2   age                5110 non-null   float64
 3   hypertension       5110 non-null   int64  
 4   heart_disease      5110 non-null   int64  
 5   ever_married       5110 non-null   object 
 6   work_type          5110 non-null   object 
 7   Residence_type     5110 non-null   object 
 8   avg_glucose_level  5110 non-null   float64
 9   bmi                4909 non-null   float64
 10  smoking_status     5110 non-null   object 
 11  stroke             5110 non-null   int64  
dtypes: float64(3), int64(4), object(5)
memory usage: 479.2+ KB


In [4]:
df[df.dtypes[df.dtypes == 'object'].index].columns

Index(['gender', 'ever_married', 'work_type', 'Residence_type',
       'smoking_status'],
      dtype='object')

In [5]:
df.describe()

Unnamed: 0,id,age,hypertension,heart_disease,avg_glucose_level,bmi,stroke
count,5110.0,5110.0,5110.0,5110.0,5110.0,4909.0,5110.0
mean,36517.829354,43.226614,0.097456,0.054012,106.147677,28.893237,0.048728
std,21161.721625,22.612647,0.296607,0.226063,45.28356,7.854067,0.21532
min,67.0,0.08,0.0,0.0,55.12,10.3,0.0
25%,17741.25,25.0,0.0,0.0,77.245,23.5,0.0
50%,36932.0,45.0,0.0,0.0,91.885,28.1,0.0
75%,54682.0,61.0,0.0,0.0,114.09,33.1,0.0
max,72940.0,82.0,1.0,1.0,271.74,97.6,1.0


In [6]:
# 데이터 결측치 개수 확인
df.isnull().sum()

id                     0
gender                 0
age                    0
hypertension           0
heart_disease          0
ever_married           0
work_type              0
Residence_type         0
avg_glucose_level      0
bmi                  201
smoking_status         0
stroke                 0
dtype: int64

In [7]:
# 결측치 drop
df = df.dropna()

In [8]:
# 중복 제거하고 unique 값 확인 -> Other 값 확인
df['gender'].unique()

array(['Male', 'Female', 'Other'], dtype=object)

In [9]:
# gender == Other인 인덱스 값 불러오고 drop 
idx = df[df['gender'] == 'Other'].index
df.drop(idx, inplace = True)

In [10]:
# drop된 데이터 확인
df['gender'].unique()

array(['Male', 'Female'], dtype=object)

## 데이터 encoding 처리 

In [11]:
# label encoding
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Label Encoder 선언 및 Fitting
en = LabelEncoder()
en.fit(df.gender)

# gender 변환 
df.gender = en.transform(df.gender)

In [12]:
df.gender

0       1
2       1
3       0
4       0
5       1
       ..
5104    0
5106    0
5107    0
5108    1
5109    0
Name: gender, Length: 4908, dtype: int64

In [13]:
#  ever_married, smoking_status 변환
df.loc[:,['ever_married', 'smoking_status']] = df.loc[:,['ever_married', 'smoking_status']].apply(en.fit_transform)

In [14]:
df.loc[:,['ever_married', 'smoking_status']]

Unnamed: 0,ever_married,smoking_status
0,1,1
2,1,2
3,1,3
4,1,2
5,1,1
...,...,...
5104,0,0
5106,1,2
5107,1,2
5108,1,1


## Input, Feature 설정

In [15]:
Label = df['stroke']
InputFeature = df[["gender", "age", "hypertension", "heart_disease", 
                    "ever_married","avg_glucose_level", "bmi", "smoking_status"]]

In [16]:
InputFeature.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,avg_glucose_level,bmi,smoking_status
0,1,67.0,0,1,1,228.69,36.6,1
2,1,80.0,0,1,1,105.92,32.5,2
3,0,49.0,0,0,1,171.23,34.4,3
4,0,79.0,1,0,1,174.12,24.0,2
5,1,81.0,0,0,1,186.21,29.0,1


## Keras Logit 모델 fitting

In [17]:
model = Sequential()
model.add(Dense(5, activation='linear', input_shape=(8,) ))
model.add(Dense(10, activation='linear'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=0.01), metrics=['accuracy'])

## Keras 모델 살펴보기

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 5)                 45        
                                                                 
 dense_1 (Dense)             (None, 10)                60        
                                                                 
 dense_2 (Dense)             (None, 1)                 11        
                                                                 
Total params: 116
Trainable params: 116
Non-trainable params: 0
_________________________________________________________________


## Call-back 함수
## 모델 학습

In [19]:
# Call-back 함수
# CheckPoint: Epoch 마다 validation 성능을 검증하여, best performance 일 경우 저장
CP = ModelCheckpoint(filepath='Models/{epoch:03d}-{loss:.4f}-{accuracy:.4f}.hdf5',
            monitor='loss', verbose=1, save_best_only=True, mode='min')

# Learning Rate 줄여나가기
LR = ReduceLROnPlateau(monitor='loss',factor=0.8,patience=3, verbose=1, min_lr=1e-8)

CALLBACK = [CP, LR]

In [20]:
model.fit(x=InputFeature, y=Label, epochs=100, shuffle=True, batch_size=3, callbacks=CALLBACK)

Epoch 1/100
   1/1636 [..............................] - ETA: 3:57 - loss: 1.8003 - accuracy: 0.6667

2022-07-19 20:01:26.319170: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1: loss improved from inf to 0.28341, saving model to Models/001-0.2834-0.9489.hdf5
Epoch 2/100
Epoch 2: loss improved from 0.28341 to 0.18409, saving model to Models/002-0.1841-0.9533.hdf5
Epoch 3/100
Epoch 3: loss improved from 0.18409 to 0.17220, saving model to Models/003-0.1722-0.9546.hdf5
Epoch 4/100
Epoch 4: loss improved from 0.17220 to 0.16299, saving model to Models/004-0.1630-0.9556.hdf5
Epoch 5/100
Epoch 5: loss did not improve from 0.16299
Epoch 6/100
Epoch 6: loss improved from 0.16299 to 0.16235, saving model to Models/006-0.1623-0.9556.hdf5
Epoch 7/100
Epoch 7: loss did not improve from 0.16235
Epoch 8/100
Epoch 8: loss improved from 0.16235 to 0.16142, saving model to Models/008-0.1614-0.9568.hdf5
Epoch 9/100
Epoch 9: loss did not improve from 0.16142
Epoch 10/100
Epoch 10: loss did not improve from 0.16142
Epoch 11/100
Epoch 11: loss improved from 0.16142 to 0.15798, saving model to Models/011-0.1580-0.9558.hdf5
Epoch 12/100
Epoch 12: loss did not improve from 0

<keras.callbacks.History at 0x17d5a41c0>

# Model Load 하기 전, hdf5 파일 이름 꼭 확인하기

In [22]:
model.load_weights("Models/099-0.1405-0.9576.hdf5")

## FLASK 셋팅하기

In [35]:
from flask import Flask
from flask import render_template
from flask import request
import pandas as pd

In [36]:
app = Flask(__name__)

# FLASK API 구현부분

In [37]:
@app.route('/')
@app.route('/Stroke')
def StrokePrediction():
    gender = request.args.get("gender")
    age = request.args.get("age")
    hypertension = request.args.get("hypertension")
    heart_disease = request.args.get("heart_disease")
    ever_married= request.args.get("ever_married")
    avg_glucose_level = request.args.get("avg_glucose_level")
    bmi = request.args.get("bmi")
    smoking_status = request.args.get("smoking_status")

    if gender == None or age == None:
        return render_template('index.html', Output = '')
    
    Input = pd.DataFrame({
        'gender': [ float(gender) ],
        'age': [ float(age) ],
        'hypertension': [ float(hypertension) ],
        'heart_disease': [ float(heart_disease) ],
        'ever_married': [ float(ever_married) ],
        'avg_glucose_level': [ float(avg_glucose_level) ],
        'bmi': [ float(bmi) ],
        'smoking_status': [float(smoking_status)]
    })
    ModelOutput = model.predict(Input)[0][0]

    return render_template('Stroke.html', Output = ModelOutput * 100 )

# Flask, port 5000으로 실행

In [38]:
app.run(host='0.0.0.0', port=5001)

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5001
 * Running on http://10.186.24.41:5001 (Press CTRL+C to quit)


In [None]:
# http://127.0.0.1:5000/SpecialSale/1/38