In [45]:
import pandas as pd
from math import sqrt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge, SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor, BaggingRegressor

In [49]:
df = pd.read_csv("csv preparado/train_ready.csv", index_col = 0)
df.head()

Unnamed: 0_level_0,carat,cut,color,clarity,depth,table,x,y,z,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,1.12,3,4,2,61.6,59.0,6.67,6.63,4.1,5363
1,1.14,4,1,3,60.0,54.0,6.74,6.97,4.11,5593
2,0.9,4,0,3,60.3,63.0,6.12,6.22,3.72,3534
3,0.71,2,1,4,61.9,54.0,5.74,5.76,3.56,3212
4,0.34,4,2,3,60.0,62.0,4.51,4.55,2.72,447


In [50]:
columnas = [i for i in df.columns if i not in ["price"]]
X = df[columnas]
y = df["price"]

In [51]:
pipeline = [
    StandardScaler(),
    Normalizer()
]

transformer = make_pipeline(*pipeline)

X_data = transformer.fit_transform(X)

In [52]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y, test_size=0.2)

In [53]:
models = {
    "linear" : LinearRegression(),
    "ridge" : Ridge(),
    "lasso": Lasso(),
    "bayesian" : BayesianRidge(),
    "stochastic" : SGDRegressor(),
    "neighbors" : KNeighborsRegressor()
}

In [54]:
for name, model in models.items():
    print(f"Entrenando {name}")
    model.fit(X_train, y_train)
print("Fin")

Entrenando linear
Entrenando ridge
Entrenando lasso
Entrenando bayesian
Entrenando stochastic
Entrenando neighbors
Fin


In [56]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"-------{name}-------")
    print ("RMSE: ", round(sqrt(mean_squared_error(y_pred, y_test)),3))

-------linear-------
RMSE:  1582.372
-------ridge-------
RMSE:  1582.653
-------lasso-------
RMSE:  1583.073
-------bayesian-------
RMSE:  1582.376
-------stochastic-------
RMSE:  1586.299
-------neighbors-------
RMSE:  1111.186


In [41]:
models2 = {
    "trees" : ExtraTreesRegressor(),
    "forest" : RandomForestRegressor(),
    "ada": AdaBoostRegressor(),
    "gradient" : GradientBoostingRegressor(),
    "bagging" : BaggingRegressor(random_state = 0)
}

In [42]:
for name, model2 in models2.items():
    print(f"Entrenando {name}")
    model2.fit(X_train, y_train)
print("Fin")

Entrenando trees
Entrenando forest
Entrenando ada
Entrenando gradient
Entrenando bagging
Fin


In [57]:
for name, model2 in models2.items():
    y_pred2 = model2.predict(X_test)
    print(f"-------{name}-------")
    print ("RMSE: ", round(sqrt(mean_squared_error(y_pred2, y_test)),3))

-------trees-------
RMSE:  291.266
-------forest-------
RMSE:  392.01
-------ada-------
RMSE:  1699.838
-------gradient-------
RMSE:  894.655
-------bagging-------
RMSE:  441.584
