# 6장 의사 결정을 하기 위한 절차 (의사결정) Quiz

In [1]:
import numpy as np
import pandas as pd
import pickle
from scipy import stats
from statsmodels.stats.weightstats import ttest_ind

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


pd.set_option('display.max_columns', None)

## 6-1. 데이터셋 불러오기


In [2]:
df = pd.read_csv("val_for_quiz.csv", index_col=0).reset_index(drop=True)
df.dropna(inplace=True)
df

Unnamed: 0,year,month,day,time,minute,stn_id,ws10_deg,ws10_ms,ta,re,hm,sun10,ts,vis1,class,is_fog,dew_point,dew_reached,temp_diff,ws10_dir,t_td
0,K,12,21,2,30,A,57.5,4.2,-0.7,0.0,60.6,0.00,-1.3,20000,4,0,-7.133154,0,0.6,NE,6.433154
2,I,11,8,18,40,E,324.7,3.9,14.7,0.0,26.5,0.00,12.2,50000,4,0,-3.753022,0,2.5,NW,18.453022
3,J,5,22,1,50,A,240.5,1.9,15.7,0.0,86.1,0.00,15.6,10650,4,0,13.456121,0,0.1,SW,2.243879
4,K,1,4,21,30,E,319.5,3.7,4.8,0.0,32.4,0.00,0.8,35330,4,0,-9.839557,0,4.0,NW,14.639557
5,I,3,5,13,20,B,240.1,3.7,1.5,0.0,42.6,0.50,-0.2,17635,4,0,-9.450206,0,1.7,SW,10.950206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78344,I,9,25,10,20,D,4.4,5.0,21.4,0.0,63.1,0.41,30.5,20000,4,0,14.314440,0,-9.1,N,7.085560
78345,J,7,13,9,0,B,352.5,0.5,25.5,0.0,74.0,0.15,30.9,10300,4,0,20.671669,0,-5.4,N,4.828331
78346,K,11,7,21,50,A,82.4,0.9,10.9,0.0,54.9,0.00,9.8,24650,4,0,2.473210,0,1.1,E,8.426790
78347,J,11,3,20,10,D,189.3,1.4,13.6,0.0,68.0,0.00,12.2,7894,4,0,7.992854,0,1.4,S,5.607146


## 6-2. 두 모집단 간의 차에 대한 검정

- `year` 컬럼의 J, K에 따른 `ta` 컬럼의 평균값에는 차이가 있다?

In [3]:
df.groupby(["year"])["ta"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
I,25422.0,13.606261,9.400612,-15.4829,5.9,14.0,21.5,36.9
J,25242.0,14.29081,9.610955,-13.179149,6.9,15.0,22.2,36.2
K,24244.0,14.112155,10.185157,-14.4,5.9,15.1,22.8,37.2


In [17]:
# 연도에 따른 ta 컬럼을 시각화해주세요



### 6-2-1. 정규성 검사

In [18]:
# 정규성 검사

ta_j = df[(df["year"]=="J")]["ta"]
ta_k = df[(df["year"]=="K")]["ta"]

# 적절한 정규성 검사를 시행해주세요

In [19]:
# 정규성 검사 결과의 p-value를 표현해주세요


In [8]:
# 정규성 결과에 따라 __에 O, X를 넣어주세요
print("J년의 ta 컬럼은 정규분포를 따른다 : __")
print("K년의 ta 컬럼은 정규분포를 따른다 : __")

J년의 ta 컬럼은 정규분포를 따른다 : __
K년의 ta 컬럼은 정규분포를 따른다 : __


### 6-2-2. 등분산성 검사

In [9]:
# 정규성을 만족한다면, 등분산성 검사를 시행해주세요
# 정규성을 만족하지 못한다면, 해당 부분을 스킵하고 비모수적 검정을 진행해주세요

### 6-2-3. 두 모집단의 평균차 검정

In [20]:
# 앞선 정규성, 등분산성 검사의 결과에 따라, 두 모집단의 평균차를 검정해주세요

## 6-3. 짝으로 된 자료에 대한 검정
- I년의 `is_fog` 여부에 따라, 안개가 꼈을 때 `ts`가 내려갔을까요?

In [11]:
df_i = df[df["year"]=="I"]
df_i.head(3)

Unnamed: 0,year,month,day,time,minute,stn_id,ws10_deg,ws10_ms,ta,re,hm,sun10,ts,vis1,class,is_fog,dew_point,dew_reached,temp_diff,ws10_dir,t_td
2,I,11,8,18,40,E,324.7,3.9,14.7,0.0,26.5,0.0,12.2,50000,4,0,-3.753022,0,2.5,NW,18.453022
5,I,3,5,13,20,B,240.1,3.7,1.5,0.0,42.6,0.5,-0.2,17635,4,0,-9.450206,0,1.7,SW,10.950206
8,I,7,22,20,40,B,148.0,0.8,22.1,1.0,92.9,0.0,23.2,4963,4,0,20.93332,0,-1.1,SE,1.16668


In [21]:
# 안개 유무에 따른 ts 컬럼을 시각화해주세요


### 6-3-1. 정규성 검사

In [13]:
# 정규성 검사
df_i_no_fog = df_i[df_i["is_fog"]==0]["ts"]
df_i_yes_fog = df_i[df_i["is_fog"]==1]["ts"]

# 적절한 정규성 검사를 시행해주세요

In [22]:
# 정규성 검사 결과의 p-value를 표현해주세요

### 6-3-2. 등분산성 검사

In [15]:
# 정규성을 만족한다면, 등분산성 검사를 시행해주세요
# 정규성을 만족하지 못한다면, 해당 부분을 스킵하고 비모수적 검정을 진행해주세요

### 6-2-3. 두 모집단의 평균차 검정

In [23]:
# 앞선 정규성, 등분산성 검사의 결과에 따라, 단측검정을 시행해주세요