In [3]:
import numpy as np
import pandas as pd
pd.set_option('display.max_row', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 10000)
pd.set_option('display.max_colwidth', None)
import math


In [4]:
abalone = pd.read_csv('../input/hwdata/abalone.csv')
concretecs = pd.read_csv('../input/hwdata/concretecs.csv')
parkinsons = pd.read_csv('../input/hwdata/parkinsons.csv')
skillcraft = pd.read_csv('../input/hwdata/skillcraft.csv')
wine = pd.read_csv('../input/hwdata/winequality-white.csv')

In [5]:
abalone.head(3)

Unnamed: 0,sex,length,diameter,height,wholeweight,shuckedweight,visceraweight,shellweight,rings
0,-1,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,-1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,1,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9


In [6]:
concretecs.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Concrete compressive strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27


In [7]:
parkinsons.head(3)

Unnamed: 0,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,Jitter:DDP,Shimmer,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE,total_UPDRS
0,0.00662,3.4e-05,0.00401,0.00317,0.01204,0.02565,0.23,0.01438,0.01309,0.01662,0.04314,0.01429,21.64,0.41888,0.54842,0.16006,34.398
1,0.003,1.7e-05,0.00132,0.0015,0.00395,0.02024,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.1081,34.894
2,0.00481,2.5e-05,0.00205,0.00208,0.00616,0.01675,0.181,0.00734,0.00844,0.01458,0.02202,0.02022,23.047,0.46222,0.54405,0.21014,35.389


In [8]:
skillcraft.head(3)

Unnamed: 0,Age,HoursPerWeek,TotalHours,APM,SelectByHotkeys,AssignToHotkeys,UniqueHotkeys,MinimapAttacks,MinimapRightClicks,NumberOfPACs,GapBetweenPACs,ActionLatency,ActionsInPAC,TotalMapExplored,WorkersMade,UniqueUnitsMade,ComplexUnitsMade,ComplexAbilitiesUsed
0,27,10,3000,143.718,0.003515,0.00022,7,0.00011,0.000392,0.004849,32.6677,40.8673,4.7508,28,0.001397,6,0.0,0.0
1,23,10,5000,129.2322,0.003304,0.000259,4,0.000294,0.000432,0.004307,32.9194,42.3454,4.8434,22,0.001194,5,0.0,0.000208
2,30,10,200,69.9612,0.001101,0.000336,4,0.000294,0.000461,0.002926,44.6475,75.3548,4.043,22,0.000745,6,0.0,0.000189


In [9]:
wine.head(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, GridSearchCV

pipe = Pipeline([('preprocessing',None),('regressor',SVR())])
params = [
    {'regressor':[SVR()],'preprocessing':[None,StandardScaler(),MinMaxScaler()],'regressor__gamma':[0.001,0.01,0.1,1,10,100],
    'regressor__C':[0.001,0.01,0.1,1,10,100]},
    {'regressor':[MLPRegressor(solver='adam',max_iter=10000)],'preprocessing':[StandardScaler(),MinMaxScaler()],'regressor__hidden_layer_sizes':[(10,),(100,),(10,10),(100,10)],
    'regressor__activation':['tanh','relu']},
    {'regressor':[RandomForestRegressor(n_estimators=200,n_jobs=-1)],'preprocessing':[None],'regressor__max_features':['auto','sqrt']},
]
kfold = KFold(n_splits=5,shuffle=True,random_state=1205)


In [13]:
for i,dataset in enumerate([abalone,concretecs,parkinsons,skillcraft,wine]):
    x = dataset.iloc[:,:-1]
    y = dataset.iloc[:,-1]
    
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=int(y.shape[0]*0.5),random_state=1205)
   
    grid = GridSearchCV(pipe,params,scoring='neg_mean_squared_error',refit=True,cv=kfold)
    grid.fit(x_train,y_train)
    print(f'{i}th dataset Best Params:\n{grid.best_params_}\n')
    print('{} Best CV score: {:.4f}'.format(dataset,math.sqrt(-grid.best_score_)))
    print('{} Test set score: {:.4f}'.format(dataset,math.sqrt(-grid.score(x_test,y_test))))
    pd.DataFrame(grid.cv_results_).to_csv("{}.csv".format(i), mode='w')

0th dataset Best Params:
{'preprocessing': StandardScaler(), 'regressor': MLPRegressor(activation='tanh', hidden_layer_sizes=(100, 10), max_iter=10000), 'regressor__activation': 'tanh', 'regressor__hidden_layer_sizes': (100, 10)}

      sex  length  diameter  height  wholeweight  shuckedweight  visceraweight  shellweight  rings
0      -1   0.455     0.365   0.095       0.5140         0.2245         0.1010       0.1500     15
1      -1   0.350     0.265   0.090       0.2255         0.0995         0.0485       0.0700      7
2       1   0.530     0.420   0.135       0.6770         0.2565         0.1415       0.2100      9
3      -1   0.440     0.365   0.125       0.5160         0.2155         0.1140       0.1550     10
4       0   0.330     0.255   0.080       0.2050         0.0895         0.0395       0.0550      7
...   ...     ...       ...     ...          ...            ...            ...          ...    ...
4172    1   0.565     0.450   0.165       0.8870         0.3700         0.23

      Jitter(%)  Jitter(Abs)  Jitter:RAP  Jitter:PPQ5  Jitter:DDP  Shimmer  Shimmer(dB)  Shimmer:APQ3  Shimmer:APQ5  Shimmer:APQ11  Shimmer:DDA       NHR     HNR     RPDE      DFA      PPE  total_UPDRS
0       0.00662     0.000034     0.00401      0.00317     0.01204  0.02565        0.230       0.01438       0.01309        0.01662      0.04314  0.014290  21.640  0.41888  0.54842  0.16006       34.398
1       0.00300     0.000017     0.00132      0.00150     0.00395  0.02024        0.179       0.00994       0.01072        0.01689      0.02982  0.011112  27.183  0.43493  0.56477  0.10810       34.894
2       0.00481     0.000025     0.00205      0.00208     0.00616  0.01675        0.181       0.00734       0.00844        0.01458      0.02202  0.020220  23.047  0.46222  0.54405  0.21014       35.389
3       0.00528     0.000027     0.00191      0.00264     0.00573  0.02309        0.327       0.01106       0.01265        0.01963      0.03317  0.027837  24.445  0.48730  0.57794  0.33277    

4th dataset Best Params:
{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'sqrt'}

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  alcohol  quality
0               7.0              0.27         0.36            20.7      0.045                 45.0                 170.0  1.00100  3.00       0.45      8.8        6
1               6.3              0.30         0.34             1.6      0.049                 14.0                 132.0  0.99400  3.30       0.49      9.5        6
2               8.1              0.28         0.40             6.9      0.050                 30.0                  97.0  0.99510  3.26       0.44     10.1        6
3               7.2              0.23         0.32             8.5      0.058                 47.0                 186.0  0.99560  3.19       0.40      9.9        6
4   

In [16]:
#abalone
result = pd.read_csv('0.csv')
result[['params','mean_test_score']].sort_values('mean_test_score')[::-1].head(3)

Unnamed: 0,params,mean_test_score
111,"{'preprocessing': StandardScaler(), 'regressor': MLPRegressor(activation='tanh', hidden_layer_sizes=(100, 10), max_iter=10000), 'regressor__activation': 'tanh', 'regressor__hidden_layer_sizes': (100, 10)}",-4.495591
110,"{'preprocessing': StandardScaler(), 'regressor': MLPRegressor(activation='tanh', hidden_layer_sizes=(100, 10), max_iter=10000), 'regressor__activation': 'tanh', 'regressor__hidden_layer_sizes': (10, 10)}",-4.507055
108,"{'preprocessing': StandardScaler(), 'regressor': MLPRegressor(activation='tanh', hidden_layer_sizes=(100, 10), max_iter=10000), 'regressor__activation': 'tanh', 'regressor__hidden_layer_sizes': (10,)}",-4.546357


In [17]:
#concretecs
result = pd.read_csv('1.csv')
result[['params','mean_test_score']].sort_values('mean_test_score')[::-1].head(3)

Unnamed: 0,params,mean_test_score
124,"{'preprocessing': None, 'regressor': RandomForestRegressor(n_estimators=200, n_jobs=-1), 'regressor__max_features': 'auto'}",-35.111051
125,"{'preprocessing': None, 'regressor': RandomForestRegressor(n_estimators=200, n_jobs=-1), 'regressor__max_features': 'sqrt'}",-39.235108
68,"{'preprocessing': StandardScaler(), 'regressor': SVR(), 'regressor__C': 100, 'regressor__gamma': 0.1}",-40.846079


In [18]:
# parkinsons
result = pd.read_csv('2.csv')
result[['params','mean_test_score']].sort_values('mean_test_score')[::-1].head(3)

Unnamed: 0,params,mean_test_score
106,"{'preprocessing': MinMaxScaler(), 'regressor': SVR(C=100, gamma=10), 'regressor__C': 100, 'regressor__gamma': 10}",-76.77219
63,"{'preprocessing': StandardScaler(), 'regressor': SVR(C=100, gamma=10), 'regressor__C': 10, 'regressor__gamma': 1}",-78.138502
68,"{'preprocessing': StandardScaler(), 'regressor': SVR(C=100, gamma=10), 'regressor__C': 100, 'regressor__gamma': 0.1}",-79.173733


In [19]:
#skillcraft
result = pd.read_csv('3.csv')
result[['params','mean_test_score']].sort_values('mean_test_score')[::-1].head(3)

Unnamed: 0,params,mean_test_score
125,"{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'sqrt'}",-4.490054e-08
124,"{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'auto'}",-4.742489e-08
60,"{'preprocessing': StandardScaler(), 'regressor': SVR(C=100, gamma=10), 'regressor__C': 10, 'regressor__gamma': 0.001}",-1.915525e-06


In [20]:
#wine
result = pd.read_csv('4.csv')
result[['params','mean_test_score']].sort_values('mean_test_score')[::-1].head(3)

Unnamed: 0,params,mean_test_score
125,"{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'sqrt'}",-0.411735
124,"{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'auto'}",-0.414997
56,"{'preprocessing': StandardScaler(), 'regressor': SVR(C=100, gamma=10), 'regressor__C': 1, 'regressor__gamma': 0.1}",-0.49784


In [31]:
result.sort_values('mean_test_score')[::-1]['params'].iloc[0]

"{'preprocessing': None, 'regressor': RandomForestRegressor(max_features='sqrt', n_estimators=200, n_jobs=-1), 'regressor__max_features': 'sqrt'}"

In [None]:
for i in range(5):
    result = pd.read_csv(f'{i}.csv')
    best_params = result.sort_values('mean_test_score')[::-1]['params'].iloc[0]

In [None]:
abalone_result

In [None]:
abalone_result[['params','mean_test_score']].sort_values('mean_test_score')[::-1]

In [None]:
from sklearn.metrics import mean_squared_error
import math


In [None]:
a = MLPRegressor(activation='tanh',hidden_layer_sizes=(10,10),max_iter=10000)
s = StandardScaler()
s.fit(x_train)
xtr = s.transform(x_train)
xte = s.transform(x_test)
a.fit(xtr,y_train)
print(r2_score(a.predict(xtr),y_train))
print(r2_score(a.predict(xte),y_test))
print(math.sqrt(mean_squared_error(a.predict(xtr),y_train)))
print(math.sqrt(mean_squared_error(a.predict(xte),y_test)))


In [None]:
from sklearn.metrics import r2_score
print(r2_score(grid.predict(x_train),y_train))
print(r2_score(grid.predict(x_test),y_test))
print(math.sqrt(mean_squared_error(grid.predict(x_train),y_train)))
print(math.sqrt(mean_squared_error(grid.predict(x_test),y_test)))
