In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge, LassoLarsCV, LassoLarsIC, HuberRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFECV, RFE
from sklearn import metrics
import feyn

In [None]:
df = pd.read_csv('winequality-red.csv', sep=";")  # gosto mais de vinho tinto

In [None]:
df.describe()

In [None]:
df.isna().any()

In [None]:
plt.imshow(df.corr(), cmap='seismic')
plt.colorbar()
plt.gca().set_xticks(np.arange(len(df.columns)))
plt.gca().set_yticks(np.arange(len(df.columns)))
plt.gca().set_xticklabels(labels=df.columns)
plt.gca().set_yticklabels(labels=df.columns)
plt.setp(plt.gca().get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
plt.tight_layout()
plt.show()

In [None]:
X = df.drop(columns=['quality'])
y = df['quality']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
results = []

In [None]:
name = 'lin_reg'
model = LinearRegression()
model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "lasso"
model = Lasso(alpha=0.01)
model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "ridge"
model = Ridge(alpha=0.1)
model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "rfecv"
regressor = LinearRegression()
model = RFECV(regressor, step=1, cv=10)
model = model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "rfe"
regressor = LinearRegression()
model = RFE(regressor, n_features_to_select=5, step=1)
model = model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "lassolarscv"
model = LassoLarsCV(cv=5).fit(X, y)
model = model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "lassolarsic"
model = LassoLarsIC(criterion='aic')
model = model.fit(X_train, y_train)
y_prediction = model.predict(X_test)
r = model.score(X_test, y_test)
mae = metrics.mean_absolute_error(y_test, y_prediction)
mse = metrics.mean_squared_error(y_test, y_prediction)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_prediction))
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
name = "SR"

ql = feyn.connect_qlattice()
ql.reset(random_seed=42)

train, test = feyn.tools.split(df, ratio=(3, 1), random_state=42)


stypes = {
    'neighbourhood_group': 'c',
    'neighbourhood': 'c',
    'room_type': 'c'
}
models = ql.auto_run(
    train,
    output_name='quality',
    stypes=stypes
)
best = models[0]
best.plot(train, test)

r = best.r2_score(test)
mae = best.mae(test)
mse = best.mse(test)
rmse = best.rmse(test)
results.append({"name": name, "r": r, "mae": mae, "mse": mse, "rmse": rmse})
print('R:', r)
print('MAE:', mae)
print('MSE:', mse)
print('RMSE:', rmse)

In [None]:
best_result = min(results, key=lambda x: x['rmse'])
print("best result:", best_result['name'])