<a href="https://colab.research.google.com/github/appletreeleaf/Project/blob/main/AI/LGAimers/%EC%9E%90%EC%9C%A8%EC%A3%BC%ED%96%89_%EC%84%BC%EC%84%9C%EC%9D%98_%EC%95%88%ED%85%8C%EB%82%98_%EC%84%B1%EB%8A%A5_%EC%98%88%EC%B8%A1_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 # Project 1. 자율주행 센서의 안테나 성능 예측
### Background
- 생산 공정 데이터를 활용하여 Radar 센서의 안테나 성능 예측을 위한 AI 모델 개발 (데이콘 competition)

### Summary
1. Import Libraries / Packages
2. Data Manipulation
3. Define Model)
4. Model Traing
5. Validation
6. Prediction

## 1. Import Libraries / Packages

In [None]:
import pandas as pd
import random
import os
import numpy as np
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error , r2_score
from sklearn.model_selection import cross_val_score

from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [None]:
# Seed 고정

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42)

## 2. Data manipulation

In [None]:
#raw data 불러오기
train_df = pd.read_csv('./train.csv') # features
test_df = pd.read_csv('./test.csv') # test

In [None]:
# feature, label 나누기
X_train = train_df.filter(regex='X') # Input : X Featrue
y_train = train_df.filter(regex='Y') # Output : Y Feature

In [None]:
# train셋 안에서 훈련용/검증용 나누기
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=156 )

## 3. Define Model

In [None]:
#Define the estimator
estimator = RandomForestRegressor(max_depth = 30,
                                  random_state = 156,
                                  n_estimators = 800,
                                  n_jobs = -1,
                                  min_samples_leaf = 8,
                                  min_samples_split = 8,
                                 )

In [None]:
# Define the model : MultiOutputRegressor
my_model = MultiOutputRegressor(estimator = estimator, n_jobs = -1).fit(X_tr, y_tr) # n_jobs = -1 -> 사용 가능한 모든 프로세스 / 스레드를 사용함을 의미

## 4. Model Training

In [None]:
# 테스트 데이터 준비
X_test = test_df.drop(columns=['ID'])
# test_x_scale = scaler.fit_transform(test_x[:])
# test_x_scale

In [None]:
# 1차 predict
preds = my_model.predict(X_val)
print('Done.')

## 5. Validation

In [None]:
# 1차 검증
mse = mean_squared_error(y_val, preds) #평가척도
rmse = np.sqrt(mse) #mean_squared_error함수의 squared=False 옵션으로도 구할 수 있음.

print('MSE : {0:.3f} , RMSE : {1:.3F}'.format(mse , rmse))
print('Variance score : {0:.3f}'.format(r2_score(y_val, preds))) #결정계수

In [None]:
# feature 중요도
importances_values = estimator.feature_importances_
importances = pd.Series(importances_values, index=X_train.columns)
top20 = importances.sort_values(ascending=False)[:20]
plt.figure(figsize=(8, 6))
plt.title('Feature importances Top 20')
sns.barplot(x = top20, y = top20.index)
plt.show()

## 6. Prediction

In [None]:
#최종 예측
my_fmodel = MultiOutputRegressor(estimator = estimator, n_jobs = -1).fit(X_train, y_train) # n_jobs = -1 -> 사용 가능한 모든 프로세스 / 스레드를 사용함을 의미
preds = my_fmodel.predict(X_test)
print('Done.')

In [None]:
preds

## 7. submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:,idx-1]
print('Done.')


In [None]:
submit

In [None]:
submit.to_csv('./submit_0826_1.csv', index=False)