In [61]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

In [15]:
data = pd.read_csv('cleantraining.csv')

In [43]:
UnrankedGames = pd.read_csv('cleantesting.csv')
UnrankedGames =UnrankedGames.reindex(columns=data.columns)
UnrankedX = UnrankedGames.drop(['id','bayesaverage','name','descriptions','mechanics','categories'], axis=1)

In [24]:
y=data['bayesaverage']
X=data.drop(['id','bayesaverage','name','descriptions','mechanics','categories'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3585)

##### Lasso Regresion

In [28]:
pipeLasso = Pipeline([
    ('scale', StandardScaler()),
    ('model', LassoCV())
])

In [29]:
pipeLasso.fit(X_train,y_train)

In [31]:
yhat=pipeLasso.predict(X_test)
np.sqrt(mean_squared_error(y_test,yhat))

0.4634214877221356

##### Random Forest

In [47]:
RFmod = RandomForestRegressor()
RFmod.fit(X_train, y_train)

In [None]:
yhat=RFmod.predict(X_test)
np.sqrt(mean_squared_error(y_test,yhat))

0.4285098093850316

##### KNN

In [54]:
pipeknn = Pipeline([
    ('scale', StandardScaler()),
    ('model', KNeighborsRegressor(n_neighbors=30))
])
pipeknn.fit(X_train, y_train)

In [55]:
yhat=pipeknn.predict(X_test)
np.sqrt(mean_squared_error(y_test,yhat))

0.6798263631261069

May be a bit underfit

##### SVM

In [62]:
pipesvm = Pipeline([
    ('scale', StandardScaler()),
    ('model', SVR(kernel='rbf'))
])
pipesvm.fit(X_train, y_train)

In [63]:
yhat=pipesvm.predict(X_test)
np.sqrt(mean_squared_error(y_test,yhat))

0.5522500176259699

##### Create a table with Predictions

In [56]:
predRF = RFmod.predict(UnrankedX)
predKNN = pipeknn.predict(UnrankedX)

In [64]:
predLasso = pipeLasso.predict(UnrankedX)
predSVM = pipesvm.predict(UnrankedX)

In [65]:
predDF = pd.DataFrame({'Name': UnrankedGames['name'],
                       'PredLasso': predLasso,
                       'PredRF': predRF,
                       'PredKNN': predKNN,
                       'PredSVM': predSVM,
                       'OldRating': UnrankedGames['bayesaverage']})

In [None]:
predDF['AvgPred']=predDF.loc[: , "PredLasso":"PredSVM"].mean(axis=1)

In [72]:
predDF.sort_values(by='AvgPred', ascending=False)

Unnamed: 0,Name,PredLasso,PredRF,PredKNN,PredSVM,OldRating,AvgPred
194,To Take Washington: Jubal Early's Summer 1864 ...,7.692874,7.855165,6.969739,7.142941,5.57647,7.415180
449,Vietnam: 1965-1975 (2nd edition),7.475841,7.757145,6.965150,7.142916,5.66859,7.335263
138,Legacy of Thracks: The Awakening,7.383316,7.770061,6.709580,7.180343,5.51825,7.260825
15,Wild Gardens,7.802215,7.120847,6.619307,7.435253,5.62107,7.244406
392,Periorbis,7.325933,7.732180,6.689146,7.126318,5.52659,7.218395
...,...,...,...,...,...,...,...
93,Big Brother: The Game,5.892596,5.950126,5.993004,5.848536,5.43686,5.921065
114,Rock Paper Scissors,5.868866,5.284767,6.302632,6.150663,5.43849,5.901732
424,Tilt'n Tumble,5.823522,5.990794,6.120579,5.656010,5.49995,5.897726
421,Biljard,5.607678,6.303566,6.245016,5.433014,5.51144,5.897319


In [73]:
predDF.to_csv('SupervisedPredictions.csv', index=False)

In [75]:
predRF

array([7.3234752, 6.3608276, 6.0801131, 6.9168215, 7.0578009, 6.3679478,
       7.0907546, 7.4626199, 6.4206601, 6.3960619, 6.3988188, 7.1489794,
       6.7192623, 6.4111387, 6.267931 , 7.1208475, 6.8111282, 6.1559514,
       6.8073826, 6.1796497, 6.7023312, 6.4299552, 6.3159919, 6.7121045,
       6.0335206, 6.7231103, 6.53265  , 7.4504812, 6.7768655, 6.1200312,
       6.4417803, 6.7833175, 6.7220828, 7.0577779, 6.7291494, 5.509995 ,
       6.1306363, 7.5589469, 5.5063019, 7.2439717, 6.238817 , 6.4313539,
       7.6795535, 6.4695581, 6.6284426, 7.1554396, 6.2099196, 6.3464155,
       6.3246366, 6.0094095, 6.9178946, 6.2205036, 6.3672549, 6.2213291,
       6.6743356, 5.5287635, 7.0199483, 6.3957223, 6.8032348, 7.509778 ,
       6.9730809, 6.9553961, 6.4935495, 7.1702917, 7.1919309, 6.4263068,
       5.8484535, 6.4317035, 6.1830575, 6.773607 , 6.7570857, 6.7107509,
       6.1969249, 7.0483003, 6.1909978, 6.1765407, 6.3139695, 5.6258859,
       6.8042025, 6.9454149, 6.9156688, 6.3298296, 