In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import koreanize_matplotlib

#### 데이터 준비

In [34]:
df = pd.read_csv('data/RocketLaunchDataCompleted2.csv', encoding='cp949')
df.head()

Unnamed: 0,Crewed or Uncrewed,Launched?,High Temp,Low Temp,Ave Temp,Hist High Temp,Hist Low Temp,Hist Ave Temp,Percipitation at Launch Time,Hist Ave Percipitation,Wind Direction,Max Wind Speed,Visibility,Condition,target
0,Uncrewed,N,75.0,68.0,71.0,75.0,55.0,65.0,0.0,0.08,E,16.0,15.0,Cloudy,0
1,Uncrewed,N,78.0,70.0,73.39,75.0,55.0,65.0,0.0,0.09,E,14.0,10.0,Cloudy,0
2,Uncrewed,Y,73.0,0.0,60.21,75.0,55.0,65.0,0.0,0.09,NE,15.0,10.0,Cloudy,1
3,Uncrewed,N,76.0,57.0,66.04,75.0,55.0,65.0,0.0,0.08,N,10.0,10.0,Partly Cloudy,0
4,Uncrewed,N,79.0,60.0,70.52,75.0,55.0,65.0,0.0,0.09,E,12.0,10.0,Partly Cloudy,0


In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 15 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Crewed or Uncrewed            300 non-null    object 
 1   Launched?                     300 non-null    object 
 2   High Temp                     300 non-null    float64
 3   Low Temp                      300 non-null    float64
 4   Ave Temp                      300 non-null    float64
 5   Hist High Temp                300 non-null    float64
 6   Hist Low Temp                 300 non-null    float64
 7   Hist Ave Temp                 300 non-null    float64
 8   Percipitation at Launch Time  300 non-null    float64
 9   Hist Ave Percipitation        300 non-null    float64
 10  Wind Direction                300 non-null    object 
 11  Max Wind Speed                300 non-null    float64
 12  Visibility                    300 non-null    float64
 13  Condi

### 독립변수, 종속변수 지정

In [36]:
X = df.drop(['Launched?', 'target'], axis=1)
y = df['target']

In [37]:
# -----------------
# 데이터 유형 파악
# -----------------
cat_cols = X.select_dtypes(include='object').columns
num_cols = X.select_dtypes(include='number').columns

### 훈련/테스트세트 분할

In [39]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((225, 13), (75, 13), (225,), (75,))

### 데이터 전처리

In [None]:
# ------------
# 스케일링
# ------------
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

train_scaled = scaler.fit_transform(X_train[num_cols])
test_scaled = scaler.transform(X_test[num_cols])

X_train_scaled = pd.DataFrame(train_scaled, columns=num_cols)
X_test_scaled = pd.DataFrame(test_scaled, columns=num_cols)

In [43]:
# ------------
# 인코딩
# ------------
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False, drop='first')

train_encoded = encoder.fit_transform(X_train[cat_cols])
test_encoded = encoder.transform(X_test[cat_cols])

col_names = encoder.get_feature_names_out(cat_cols)
X_train_encoded = pd.DataFrame(train_encoded, columns=col_names)
X_test_encoded = pd.DataFrame(test_encoded, columns=col_names)



In [48]:
# ------------
# 최종
# ------------
X_train_preprocessed = pd.concat([X_train_scaled, X_train_encoded], axis=1)
X_test_preprocessed = pd.concat([X_test_scaled, X_test_encoded], axis=1)

X_train_preprocessed

Unnamed: 0,High Temp,Low Temp,Ave Temp,Hist High Temp,Hist Low Temp,Hist Ave Temp,Percipitation at Launch Time,Hist Ave Percipitation,Max Wind Speed,Visibility,...,Condition_Fail,Condition_Fair,Condition_Heavy T-Storm,Condition_Light Rain,Condition_Mostly Cloudy,Condition_Partly Cloudly,Condition_Partly Cloudy,Condition_Rain,Condition_T-Storm,Condition_Thunder
0,0.984930,-1.178614,-0.380270,0.768842,1.083569,0.995520,-0.296502,-0.043031,1.173852,-0.927043,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,-0.932935,0.239343,-0.927798,-1.075149,-1.287454,-1.195732,-0.096613,-0.078353,0.592097,-0.460758,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,-1.028828,-1.178614,-1.079133,-1.075149,-1.390542,-1.195732,-0.296502,-0.078353,-0.377494,-0.460758,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-2.083653,-0.002011,-1.772896,-1.075149,-1.287454,-1.195732,-0.296502,-0.076494,-0.183576,0.316383,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,-0.549362,0.872898,0.138352,-0.729401,-0.772014,-0.757482,-0.296502,-0.078353,-0.183576,0.316383,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,0.505464,1.144422,0.992802,1.114590,1.083569,1.105082,-0.296502,-0.054185,-1.541004,1.093524,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
221,-0.261682,-1.178614,-1.245772,-0.729401,-0.978190,-0.867044,-0.296502,-0.072775,-0.183576,-0.460758,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
222,0.697250,-1.178614,0.575354,0.999341,1.083569,1.105082,-0.296502,-0.046749,0.592097,-0.460758,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
223,-0.837041,0.269512,-0.762009,-0.844650,-1.081278,-0.976607,-0.196557,-0.078353,-0.571412,1.093524,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### 모델 생성

### 성능평가