# HeartAttack Prediction
### 속성 설명
1. Age : 나이
2. Sex : 성별 (1 = 남성, 0 = 여성)
3. cp : 가슴 통증 종류
    - 1: 전형적인 협심증
    - 2: 불규칙적인 협심증
    - 3: 협심증이 아닌 통증
    - 4: 무증상
4. trtbps : 혈압
5. chol : 콜레스트롤 수치
6. fbs : 공복 혈당 (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
7. restecg : 공복 심전도 (0 = 정상, 1 = , 2)
8. thalachh : 최대 심박수
9. exang : 운동으로 인한 협심증 (1 = 예; 0 = 아니요)
10. output : 0 = 심장마비 확률 적음, 1 = 심장마비 확률 높음
 
(angina/협심증 : 심장의 혈액 순환로가 되는 관상동맥이 좁아져 갑작스럽게 흉부 통증 또는 압박감을 느끼는 상태)

- 역할 분담 (예정)
    - 이예현 : 데이터 가공, 시각화, 주석 작성
    - 정해성 : 모델 학습, 예측, 평가

- 출처 : https://www.kaggle.com/rashikrahmanpritom/heart-attack-analysis-prediction-dataset

### 데이터 시각화
---


In [55]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import copy
origin_df = pd.read_csv('./heart.csv')
origin_df.drop(['oldpeak', 'slp', 'caa', 'restecg', 'thall'], axis = 1, inplace = True)
origin_df

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,thalachh,exng,output
0,63,1,3,145,233,1,150,0,1
1,37,1,2,130,250,0,187,0,1
2,41,0,1,130,204,0,172,0,1
3,56,1,1,120,236,0,178,0,1
4,57,0,0,120,354,0,163,1,1
...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,123,1,0
299,45,1,3,110,264,0,132,0,0
300,68,1,0,144,193,1,141,0,0
301,57,1,0,130,131,0,115,1,0


In [56]:
origin_df.rename(
    columns = {
        'age' : 'Age',
        'sex' : 'Sex',
        'cp' : 'PainType',
        'trtbps' : 'BloodPressure',
        'chol' : 'CholestoralDensity',
        'fbs' : 'BloodSugar',
        'thalachh' : 'MaxHeartRate',
        'exng' : 'IsExerciseInduced',
        'output' : 'Result',
    },
    inplace = True
)
origin_df

Unnamed: 0,Age,Sex,PainType,BloodPressure,CholestoralDensity,BloodSugar,MaxHeartRate,IsExerciseInduced,Result
0,63,1,3,145,233,1,150,0,1
1,37,1,2,130,250,0,187,0,1
2,41,0,1,130,204,0,172,0,1
3,56,1,1,120,236,0,178,0,1
4,57,0,0,120,354,0,163,1,1
...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,123,1,0
299,45,1,3,110,264,0,132,0,0
300,68,1,0,144,193,1,141,0,0
301,57,1,0,130,131,0,115,1,0


In [72]:
medi_df = copy.deepcopy(origin_df)
atomic_col_names = ('Sex', 'PainType', 'BloodSugar', 'IsExerciseInduced')
atomic_col_counts = (2, 4, 2, 2)
for index in range(len(atomic_col_names)):
    for i in range(atomic_col_counts[index]):
        newname = f'{atomic_col_names[index]}={i}'
        temp_series = origin_df[atomic_col_names[index]]
        medi_df[newname] = pd.Series(origin_df.index)
        for j in temp_series.index:
               medi_df[newname][j] = int(temp_series[j] == i)
medi_df

Unnamed: 0,Age,Sex,PainType,BloodPressure,CholestoralDensity,BloodSugar,MaxHeartRate,IsExerciseInduced,Result,Sex=0,Sex=1,PainType=0,PainType=1,PainType=2,PainType=3,BloodSugar=0,BloodSugar=1,IsExerciseInduced=0,IsExerciseInduced=1
0,63,1,3,145,233,1,150,0,1,0,1,0,0,0,1,0,1,1,0
1,37,1,2,130,250,0,187,0,1,0,1,0,0,1,0,1,0,1,0
2,41,0,1,130,204,0,172,0,1,1,0,0,1,0,0,1,0,1,0
3,56,1,1,120,236,0,178,0,1,0,1,0,1,0,0,1,0,1,0
4,57,0,0,120,354,0,163,1,1,1,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,123,1,0,1,0,1,0,0,0,1,0,0,1
299,45,1,3,110,264,0,132,0,0,0,1,0,0,0,1,1,0,1,0
300,68,1,0,144,193,1,141,0,0,0,1,1,0,0,0,0,1,1,0
301,57,1,0,130,131,0,115,1,0,0,1,1,0,0,0,1,0,0,1


In [79]:
fine_df = medi_df[medi_df['Result'] == 0]
dan_df = medi_df[medi_df['Result'] == 1]
excluded_names = ['Sex', 'PainType', 'BloodSugar', 'IsExerciseInduced', 'Result']

In [100]:
sums = dict()
fine_df_mean = pd.DataFrame()
for colname in fine_df.drop(excluded_names, axis = 1).columns:
    if colname[:-2] in atomic_col_names:
        if not(colname[:-2] in sums.keys()): sums[colname[:-2]] = 0
        fine_df_mean[colname] = pd.Series(fine_df[colname].sum() / (medi_df[colname].sum() / len(medi_df.index)))
        sums[colname[:-2]] += fine_df_mean[colname][0]
    else:
        fine_df_mean[colname] = pd.Series(fine_df[colname].mean())
for colname in fine_df_mean.columns:
    if colname[:-2] in sums.keys(): fine_df_mean[colname] = fine_df_mean[colname] / sums[colname[:-2]] * 100
fine_df_mean

Unnamed: 0,Age,BloodPressure,CholestoralDensity,MaxHeartRate,Sex=0,Sex=1,PainType=0,PainType=1,PainType=2,PainType=3,BloodSugar=0,BloodSugar=1,IsExerciseInduced=0,IsExerciseInduced=1
0,56.601449,134.398551,251.086957,139.101449,31.221719,68.778281,51.26993,12.689308,14.585411,21.455351,47.907489,52.092511,28.361519,71.638481


In [101]:
sums = dict()
dan_df_mean = pd.DataFrame()
for colname in dan_df.drop(excluded_names, axis = 1).columns:
    if colname[:-2] in atomic_col_names:
        if not(colname[:-2] in sums.keys()): sums[colname[:-2]] = 0
        dan_df_mean[colname] = pd.Series(dan_df[colname].sum() / (medi_df[colname].sum() / len(dan_df.index)))
        sums[colname[:-2]] += dan_df_mean[colname][0]
    else:
        dan_df_mean[colname] = pd.Series(dan_df[colname].mean())
for colname in fine_df_mean.columns:
    if colname[:-2] in sums.keys(): dan_df_mean[colname] = dan_df_mean[colname] / sums[colname[:-2]] * 100
dan_df_mean

Unnamed: 0,Age,BloodPressure,CholestoralDensity,MaxHeartRate,Sex=0,Sex=1,PainType=0,PainType=1,PainType=2,PainType=3,BloodSugar=0,BloodSugar=1,IsExerciseInduced=0,IsExerciseInduced=1
0,52.49697,129.30303,242.230303,158.466667,62.537764,37.462236,10.564752,31.764688,30.722785,26.947774,51.850049,48.149951,74.976,25.024


In [None]:
plt

### 머신러닝 모델 학습
---

In [2]:
import sklearn