In [None]:
"""
- Efetuar uma regressão linear utilizando a base de dados em anexo que trata do valor de imóveis baseado em suas características.
- Utilizar somente a feature area.
- Separar a base de dados usando validação cruzada em 5  rodadas (folds). Isto é, em cada rodada, selecionar aleatoriamente 90% dos dados para treinamento e 10% para teste (use o train_test_split do scikit-learn).
"""

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Carregar os dados
data = pd.read_csv('Housing.csv')

# Selecionar apenas a feature 'area' e o target 'price'
X = data['area'].values
y = data['price'].values

# Normalizar os dados
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_scaled = scaler_X.fit_transform(X.reshape(-1, 1)).flatten()
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

learning_rate = 0.1
epochs = 51

def compute_cost(X, y, m, b):
    total_cost = 0
    N = len(X)
    for i in range(N):
        total_cost += (y[i] - (m * X[i] + b)) ** 2
    return total_cost / N

def gradient_descent(X, y, m, b, learning_rate, epochs):
    N = len(X)
    cost_history = []
    m_values = []
    b_values = []

    for epoch in range(epochs):
        m_gradient = 0
        b_gradient = 0

        for i in range(N):
            m_gradient += -(2/N) * X[i] * (y[i] - (m * X[i] + b))
            b_gradient += -(2/N) * (y[i] - (m * X[i] + b))

        m -= (learning_rate * m_gradient)
        b -= (learning_rate * b_gradient)

        # Armazenar o custo e os valores de m e b em cada época
        cost = compute_cost(X, y, m, b)
        cost_history.append(cost)
        m_values.append(m)
        b_values.append(b)

    return m, b, cost_history, m_values, b_values

folds = 5

# Inicializar os parâmetros m (slope) e b (intercept)
m = 0
b = 0
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.1)
plt.scatter(X_train, y_train)
plt.plot(X_train, m * X_train + b, color='red')
plt.title(f'Before Training')
plt.show()

for fold in range(folds): 
    # Separar os dados em treinamento e teste
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.1)
    
    # Executar Gradiente Descendente
    m, b, cost_history, m_values, b_values = gradient_descent(X_train, y_train, m, b, learning_rate, epochs)

    # Exibir o custo final para este fold
    print(f'Fold {fold + 1}, Final Cost: {cost_history[-1]}')
    
    # Plotar o gráfico final para este fold
    plt.scatter(X_train, y_train)
    plt.plot(X_train, m * X_train + b, color='red')
    plt.title(f'Fold {fold + 1} - Linear Regression Result')
    plt.show()
