In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Modelos de Machine Learning
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv('dados_entrega.csv')

# Tirando Valores nulos
df = df.dropna()

# Encoders
encoder_transporte = LabelEncoder()
encoder_trafego = LabelEncoder()
encoder_regiao = LabelEncoder()

df['Tipo de Transporte'] = encoder_transporte.fit_transform(df['Tipo de Transporte'])
df['Trafego'] = encoder_trafego.fit_transform(df['Trafego'])
df['Regiao'] = encoder_regiao.fit_transform(df['Regiao'])

# Features e Target
x = df[['Tipo de Transporte', 'Trafego', 'Regiao', 'Distancia (km)']]
y = df['Tempo de Entrega (min)']

# Train_Test_Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

modelo = LinearRegression()
modelo.fit(x_train, y_train)

# COLOCAR DADOS PARA PREDICT

predict = pd.DataFrame([[0, 0, 0, 0]], columns=['Tipo de Transporte', 'Trafego', 'Regiao', 'Distancia (km)'])

y_pred = modelo.predict(predict)
y_test_pred = modelo.predict(x_test)

#================== Números ==================

# Tipo de Transporte  =   | BICICLETA = 0 | CAMINHÃO = 1 | MOTO = 2 | VAN = 3 |

# Trafego  =  | LEVE = 0 | MODERADO = 1 | PESADO = 2 |

# Regiao  =  | CENTRO = 0 | SUBÚRBIU = 1 | ZONA RURAL = 2 |


#=============================================

# Calcular as métricas
mse = mean_squared_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)

print(f"MSE: {mse:.2f}")
print(f"R²: {r2:.2f}")

grupo = df.groupby('Trafego').sum()

display(f'Tempo de Entrega: {y_pred[0]:.2f} Minutos')

plt.scatter(y_test, y_test, c='b', label="Valores Preditos")
plt.scatter(y_test, y_test_pred, c='g', label="Valores Reais")
plt.title('Comparação')
plt.legend()
plt.show()