In [27]:
import streamlit as st
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error
import plotly.graph_objects as go

# 데이터 불러오기
tips = sns.load_dataset('tips')
print(tips.info())

# 특성과 타겟 분리
y = tips['tip']
X = tips['total_bill']

# 훈련, 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Hyperparameters
max_depth = st.select_slider("Select max depth", options=[i for i in range(2, 30)], value=(5, 10))
min_samples_leaf = st.slider("Minimum samples leaf", min_value=2, max_value=20)

st.write('max_depth:', max_depth, 'min_samples_leat:', min_samples_leaf)

random_search = {'max_depth': [i for i in range(max_depth[0], max_depth[1])],
                 'min_samples_leaf': [min_samples_leaf]}

clf = RandomForestRegressor()
model = RandomizedSearchCV(estimator = clf, param_distributions = random_search, n_iter = 10, 
                               cv = 4, verbose= 1, random_state= 101, n_jobs = -1)
model.fit(X_train,y_train)

# 예측
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# 성능 평가
test_rmse = mean_squared_error(y_test, y_test_pred, squared=False)
r2 = r2_score(y_test, y_test_pred)

# 그래프 그리기
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=X_test['total_bill'], y=y_test, mode='markers', name='test', marker=dict(color='red'))
)
fig.add_trace(
    go.Scatter(x=X_test['total_bill'], y=y_test_pred, mode='markers', name='prediction', marker=dict(color='green'))
)

fig.update_layout(
    title='Tip Prediction with Random Forest Regressor',
    xaxis_title='Total Bill',
    yaxis_title='Total',
    annotations=[go.layout.Annotation(x=0.05, y=0.9, 
                                        text=f'Test RMSE: {test_rmse:.3f}<br>R2 Score: {r2:.3f}',
                                        showarrow=False)]
)

st.plotly_chart(fig)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
None
Fitting 4 folds for each of 5 candidates, totalling 20 fits



The total space of parameters 5 is smaller than n_iter=10. Running 5 iterations. For exhaustive searches, use GridSearchCV.



ValueError: 
All the 20 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\ensemble\_forest.py", line 345, in fit
    X, y = self._validate_data(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\base.py", line 584, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 1106, in check_X_y
    X = check_array(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 902, in check_array
    raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[32.9  28.97 34.83 50.81 12.66 21.01 16.31 27.05 13.81 15.77 12.43 13.39
 10.33 35.26 35.83 12.69 14.07 24.59 14.26 15.06 34.3  31.85 16.   11.02
  8.51 11.38 12.16 40.55 12.02 16.47 17.59 30.4  30.14 16.04 15.95 39.42
 45.35  9.6  16.66 31.27 20.53 32.4  22.75 20.69  9.68  8.35 15.69 23.68
 15.36  9.94 12.26 13.42 32.83 10.09 18.04 13.81 19.44 17.78 12.74 26.88
 15.81 14.   22.49 32.68 11.59  8.58 21.16 15.04 18.43 48.27 12.76 20.27
 16.29 20.76 26.86 11.61 18.26 13.51 13.42 18.29 25.56 25.21 12.54 24.01
 16.82 48.33 23.1  27.18 12.9  10.63 11.24 28.55 24.71 20.29 26.41 16.4
 21.5  12.6  30.46 19.81 22.82 16.93 25.   22.12 10.34 34.81  7.51 19.08
 13.13 22.42 12.46 29.93 18.28 13.   14.73 28.17 30.06 13.42 18.78 17.92
 18.15 17.07 20.49 14.83  5.75 34.63 44.3 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\ensemble\_forest.py", line 345, in fit
    X, y = self._validate_data(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\base.py", line 584, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 1106, in check_X_y
    X = check_array(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 902, in check_array
    raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[15.53 20.45 34.65 25.29 38.01 29.8   9.78 25.71 20.08 11.17 38.73 16.21
 18.35 15.42 24.06 21.7  13.94 29.03  8.52 16.32 17.51 10.29 40.17 17.47
 16.58 17.92 17.46 18.24 15.98 22.76 16.43 13.37 10.07 13.27 23.17 15.48
 16.99 21.01 27.2  22.23 11.35 14.15 11.87 20.69  9.68  8.35 15.69 23.68
 15.36  9.94 12.26 13.42 32.83 10.09 18.04 13.81 19.44 17.78 12.74 26.88
 15.81 14.   22.49 32.68 11.59  8.58 21.16 15.04 18.43 48.27 12.76 20.27
 16.29 20.76 26.86 11.61 18.26 13.51 13.42 18.29 25.56 25.21 12.54 24.01
 16.82 48.33 23.1  27.18 12.9  10.63 11.24 28.55 24.71 20.29 26.41 16.4
 21.5  12.6  30.46 19.81 22.82 16.93 25.   22.12 10.34 34.81  7.51 19.08
 13.13 22.42 12.46 29.93 18.28 13.   14.73 28.17 30.06 13.42 18.78 17.92
 18.15 17.07 20.49 14.83  5.75 34.63 44.3 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\ensemble\_forest.py", line 345, in fit
    X, y = self._validate_data(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\base.py", line 584, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 1106, in check_X_y
    X = check_array(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 902, in check_array
    raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[15.53 20.45 34.65 25.29 38.01 29.8   9.78 25.71 20.08 11.17 38.73 16.21
 18.35 15.42 24.06 21.7  13.94 29.03  8.52 16.32 17.51 10.29 40.17 17.47
 16.58 17.92 17.46 18.24 15.98 22.76 16.43 13.37 10.07 13.27 23.17 15.48
 16.99 21.01 27.2  22.23 11.35 14.15 11.87 32.9  28.97 34.83 50.81 12.66
 21.01 16.31 27.05 13.81 15.77 12.43 13.39 10.33 35.26 35.83 12.69 14.07
 24.59 14.26 15.06 34.3  31.85 16.   11.02  8.51 11.38 12.16 40.55 12.02
 16.47 17.59 30.4  30.14 16.04 15.95 39.42 45.35  9.6  16.66 31.27 20.53
 32.4  22.75 48.33 23.1  27.18 12.9  10.63 11.24 28.55 24.71 20.29 26.41
 16.4  21.5  12.6  30.46 19.81 22.82 16.93 25.   22.12 10.34 34.81  7.51
 19.08 13.13 22.42 12.46 29.93 18.28 13.   14.73 28.17 30.06 13.42 18.78
 17.92 18.15 17.07 20.49 14.83  5.75 34.63 44.3 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\ensemble\_forest.py", line 345, in fit
    X, y = self._validate_data(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\base.py", line 584, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 1106, in check_X_y
    X = check_array(
  File "C:\Users\j2hoo\OneDrive\Documents\streamlitbook-evansarah\venv\lib\site-packages\sklearn\utils\validation.py", line 902, in check_array
    raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[15.53 20.45 34.65 25.29 38.01 29.8   9.78 25.71 20.08 11.17 38.73 16.21
 18.35 15.42 24.06 21.7  13.94 29.03  8.52 16.32 17.51 10.29 40.17 17.47
 16.58 17.92 17.46 18.24 15.98 22.76 16.43 13.37 10.07 13.27 23.17 15.48
 16.99 21.01 27.2  22.23 11.35 14.15 11.87 32.9  28.97 34.83 50.81 12.66
 21.01 16.31 27.05 13.81 15.77 12.43 13.39 10.33 35.26 35.83 12.69 14.07
 24.59 14.26 15.06 34.3  31.85 16.   11.02  8.51 11.38 12.16 40.55 12.02
 16.47 17.59 30.4  30.14 16.04 15.95 39.42 45.35  9.6  16.66 31.27 20.53
 32.4  22.75 20.69  9.68  8.35 15.69 23.68 15.36  9.94 12.26 13.42 32.83
 10.09 18.04 13.81 19.44 17.78 12.74 26.88 15.81 14.   22.49 32.68 11.59
  8.58 21.16 15.04 18.43 48.27 12.76 20.27 16.29 20.76 26.86 11.61 18.26
 13.51 13.42 18.29 25.56 25.21 12.54 24.01 16.82].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
