# 로지스틱 회귀 + 표준화
## #01. 작업준비
### 패키지 가져오기



In [1]:
import pandas as pd
from statsmodels.formula.api import logit
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, accuracy_score, precision_score, f1_score,recall_score
import seaborn as sb
from matplotlib import pyplot as plt
import numpy as np
import sys
import os

sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
from helper import my_logit, scailing

### 그래프 초기화

In [2]:
plt.rcParams["font.family"] = 'AppleGothic' if sys.platform == 'darwin' else 'Malgun Gothic'
plt.rcParams["font.size"] = 12
plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["axes.unicode_minus"] = False

### 데이터 가져오기
방사선학 석사과정 대학원생 모집을 하였다. 이 때 지원한 방사선사의 대학원 합격 여부에 따른 주요 요인이 무엇인지 분석하라.

단, 독립변수는 정규분포를 따른다고 가정한다


|변수|	구분	|설명|
|--|--|--|
|합격여부|	범주형|	1=합격, 0=불합격|
|필기점수|	연속형|	800점 만점|
|학부성적|	연속형|	4.0 만점|
|병원경력|	범주형|	1: 10년이상, 2: 2~5년, 3: 1~5년, 4: 1년 미만|


In [3]:
df = pd.read_excel("https://data.hossam.kr/E05/gradeuate.xlsx")
df

Unnamed: 0,합격여부,필기점수,학부성적,병원경력
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.00,1
3,1,640,3.19,4
4,0,520,2.93,4
...,...,...,...,...
395,0,620,4.00,2
396,0,560,3.04,3
397,0,460,2.63,2
398,0,700,3.65,2


In [None]:
df.

## #02. 데이터 표준화




In [4]:
df_tmp = df.drop('합격여부',axis=1)
std_df = scailing(df_tmp)
std_df['합격여부']=df['합격여부']
std_df.head()

Unnamed: 0,필기점수,학부성적,병원경력,합격여부
0,-1.800263,0.579072,0.545968,0
1,0.626668,0.736929,0.545968,1
2,1.840134,1.605143,-1.574296,1
3,0.453316,-0.525927,1.6061,1
4,-0.586797,-1.209974,1.6061,0


## #03. 로지스틱 회귀분석 (모듈기능 활용)

In [5]:
logit_result = my_logit(std_df, y='합격여부', x=['필기점수','학부성적','병원경력'])
print(logit_result.summary)

Optimization terminated successfully.
         Current function value: 0.574302
         Iterations 5
<bound method BinaryResults.summary of <statsmodels.discrete.discrete_model.LogitResults object at 0x0000029B772E8890>>


In [6]:
logit_result.cmdf

Unnamed: 0,Positive,Negative
True,253,20
False,98,29


In [7]:
logit_result.odds_rate_df

Unnamed: 0,odds_rate
Intercept,0.423557
필기점수,1.302986
학부성적,1.343577
병원경력,0.589627


In [8]:
logit_result.prs

0.08107331586891475

In [None]:
logit_result.prs

0.08107331586891475

In [9]:
logit_result.result_df

Unnamed: 0,설명력(Pseudo-Rsqe),정확도(Accuracy),정밀도(Precision),"재현율(Recall, TPR)","위양성율(Fallout, FPR)","특이성(Specificity, TNR)",RAS,f1_score
0,0.081073,0.705,0.591837,0.228346,0.102837,0.897163,0.577543,0.329545
