## Ejercicio de Tarea 05
Para los datos del ejercicio 1 de la liga de Futbol

**a**. Usar el algoritmo de selección hacia adelante para seleccionar un modelo de regresión.

**b**. Usar el algoritmo de selección hacia atrás para seleccionar un modelo de regresión.

**c**. Usar el algoritmo de regresión por pasos para seleccionar un modelo de regresión.

**d**. Comenta los modelos finales en cada uno de los casos anteriores. ¿Cuál tiene más sentido? ¿Cuál modelo usarían? 

## 0. Importar 

### 0.1. Importar Librerías

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import statsmodels.api as sm
import statsmodels.formula.api as smf

from scipy import stats

import SourcePython as src

### 0.2. Cargar Datos

In [2]:
DatasetFutbol = pd.read_csv(
    './Liga_nacional_de_futbol.csv',
)

TargetLabel , *FeatureLabels = DatasetFutbol.columns

## **a**. Modelo por Selección hacia Adelante

In [34]:
def GenerateModels(
        Dataset: pd.DataFrame,
        TargetLabel: str,
    ):
    """
    """

    def CreateModelInstance(
            FeaturesModel: list[str]
        ):
        """
        """

        LinearModel = smf.ols(
            f"{TargetLabel} ~ " + ' + '.join(FeaturesModel),
            data = Dataset,
        ).fit()

        return LinearModel
    
    return CreateModelInstance

def EvaluateModel(
        LinearModel
    ) -> float:
    """
    """ 

    return LinearModel.mse_resid

CreateModelInstance = GenerateModels(DatasetFutbol,TargetLabel)

In [54]:
from copy import deepcopy

def ForwardSelection(
        Dataset: pd.DataFrame,
        FeatureLabels: list[str],
        TargetLabel: str,
    ) -> list[str]: 
    """
    """

    CreateModelInstance = GenerateModels(Dataset,TargetLabel)
    BestLinearModel = smf.ols(
        f"{TargetLabel} ~ 1",
        data = Dataset
    ).fit()
    BestScore = EvaluateModel(BestLinearModel)
    BestFeatures = []

    AvailableFeatures = deepcopy(FeatureLabels)
    while True:
        best_score_alt = np.inf
        best_feature = ''
        for feature in AvailableFeatures:
            LinearModel_Alt = CreateModelInstance(BestFeatures+[feature])
            score = EvaluateModel(LinearModel_Alt)

            if score < best_score_alt:
                best_feature = feature
                best_score_alt = score

        if best_score_alt < BestScore:
            BestFeatures.append(best_feature)
            BestScore = best_score_alt
            AvailableFeatures.remove(best_feature)
        else:
            return BestFeatures

In [55]:
best_features_forward = ForwardSelection(DatasetFutbol,FeatureLabels,TargetLabel)
EvaluateModel(CreateModelInstance(best_features_forward)) , best_features_forward

(np.float64(2.8262759381660296), ['x8', 'x2', 'x7', 'x9'])

## **b**. Modelo por Selección hacia Atrás

In [52]:
from copy import deepcopy

def BackwardSelection(
        Dataset: pd.DataFrame,
        FeatureLabels: list[str],
        TargetLabel: str,
    ) -> list[str]: 
    """
    """

    CreateModelInstance = GenerateModels(Dataset,TargetLabel)
    BestLinearModel = CreateModelInstance(FeatureLabels)
    BestScore = EvaluateModel(BestLinearModel)
    BestFeatures = deepcopy(FeatureLabels)

    while True:
        best_score_alt = np.inf
        worst_feature = ''
        for feature in BestFeatures:
            LinearModel_Alt = CreateModelInstance([__feature for __feature in BestFeatures if __feature != feature])
            score = EvaluateModel(LinearModel_Alt)
            
            if score < best_score_alt:
                worst_feature = feature
                best_score_alt = score

        if best_score_alt < BestScore:
            BestScore = best_score_alt
            BestFeatures.remove(worst_feature)
        else:
            return BestFeatures

In [53]:
best_features_backward = BackwardSelection(DatasetFutbol,FeatureLabels,TargetLabel)
EvaluateModel(CreateModelInstance(best_features_backward)) , best_features_backward

(np.float64(2.826275938166031), ['x2', 'x7', 'x8', 'x9'])

## **c**. Modelo por Selección por Pasos

In [106]:
from copy import deepcopy

def StepwiseSelection(
        Dataset: pd.DataFrame,
        FeatureLabels: list[str],
        TargetLabel: str,
    ) -> list[str]: 
    """
    """
    
    CreateModelInstance = GenerateModels(Dataset,TargetLabel)
    BestLinearModel = smf.ols(
        f"{TargetLabel} ~ 1",
        data = Dataset
    ).fit()
    BestScore = EvaluateModel(BestLinearModel)
    BestFeatures = []

    while True:
        best_score = BestScore
        trial_features = deepcopy(BestFeatures)

        best_add_feature = ''
        for feature in [_feature for _feature in FeatureLabels if _feature not in trial_features]:
            LinearModel_Alt = CreateModelInstance(trial_features+[feature])
            score = EvaluateModel(LinearModel_Alt)
            
            if score < best_score:
                best_add_feature = feature
                best_score = score

        if best_add_feature: trial_features.append(best_add_feature)

        worst_remove_feature = ''
        for feature in trial_features:
            subset_features = [_feature for _feature in trial_features if _feature != feature]
            if subset_features:
                LinearModel_Alt = CreateModelInstance(subset_features)
                score = EvaluateModel(LinearModel_Alt)

                if score < best_score:
                    worst_remove_feature = feature
                    best_score = score

        if worst_remove_feature: trial_features.remove(worst_remove_feature)

        LinearModel_Alt = CreateModelInstance(trial_features)
        best_score = EvaluateModel(LinearModel_Alt)
        if best_score < BestScore:
            BestFeatures = deepcopy(trial_features)
            BestScore = best_score
        else:
            break
    
    return BestFeatures

In [107]:
best_features_stepwise = StepwiseSelection(DatasetFutbol,FeatureLabels,TargetLabel)
EvaluateModel(CreateModelInstance(best_features_stepwise)) , best_features_stepwise 

(np.float64(2.8262759381660296), ['x8', 'x2', 'x7', 'x9'])