In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import r2_score

In [2]:
data = pd.read_csv('abalone.csv')

In [3]:
data.head()

Unnamed: 0,Sex,Length,Diameter,Height,WholeWeight,ShuckedWeight,VisceraWeight,ShellWeight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [4]:
data['Sex'].replace(regex={'M': 1, 'F': -1, 'I': 0}, inplace=True)

In [5]:
data.head()

Unnamed: 0,Sex,Length,Diameter,Height,WholeWeight,ShuckedWeight,VisceraWeight,ShellWeight,Rings
0,1,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,-1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,1,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,0,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [6]:
y = data['Rings']
X = data.iloc[:, :-1]

In [7]:
grid = {'n_estimators': range(1, 51)}
cv = KFold(n_splits=5, shuffle=True, random_state=42)
model = RandomForestRegressor(random_state=42)
gs = GridSearchCV(model, grid, scoring='r2', cv=cv)
gs.fit(X, y)

GridSearchCV(cv=KFold(n_splits=5, random_state=42, shuffle=True),
             estimator=RandomForestRegressor(random_state=42),
             param_grid={'n_estimators': range(1, 51)}, scoring='r2')

In [8]:
def min_n_to_score(n, scores, level):
    for i, score in zip(n, scores):
        if score > level:
            return i
    return i  

In [9]:
gs.cv_results_

{'mean_fit_time': array([0.02763867, 0.04523706, 0.07055516, 0.08566937, 0.10030608,
        0.13810153, 0.13818979, 0.15406022, 0.17484965, 0.19208622,
        0.20958891, 0.24334531, 0.29914627, 0.33634653, 0.53455448,
        0.38792982, 0.3279201 , 0.34408073, 0.36156044, 0.37426949,
        0.3926888 , 0.40859189, 0.44581919, 0.64573789, 0.49701681,
        0.54931269, 0.55195313, 1.03505597, 0.57051802, 0.58144431,
        0.58634639, 0.59811788, 0.60883269, 0.61798358, 0.63876848,
        0.65430055, 0.67735586, 0.72593098, 1.16087823, 0.73752451,
        0.78541517, 0.93138819, 0.85208435, 1.09970403, 0.9517766 ,
        1.30896096, 0.9517488 , 0.90391712, 1.21980367, 1.14469509]),
 'mean_score_time': array([0.00426731, 0.00396686, 0.0061357 , 0.00508165, 0.00433073,
        0.00623012, 0.00488262, 0.00504241, 0.00543175, 0.00572958,
        0.00604806, 0.00686369, 0.00896039, 0.00868735, 0.01539893,
        0.01158886, 0.00785694, 0.00797925, 0.00828171, 0.00839834,
        0.

In [10]:
min_n = min_n_to_score(gs.cv_results_['param_n_estimators'], gs.cv_results_['mean_test_score'], 0.52)
min_n

27

In [11]:
with open('1.txt', 'w') as f:
    f.write(str(min_n))