# Trabajo IA

## Selección de características para mejorar modelos predictivos

Aqui añadimos una descripcion de lo que vamos a hacer y tal

### Algoritmo de búsqueda hacia atrás

In [1]:
import pandas as pd
import BusquedaSecuencialAtras as bsa
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

titanic = pd.read_csv('../data/titanic.csv')
titanic.head()

atributos_discretos = ['Pclass', 'Sex', 'SibSp', 'Parch', 'Embarked', 'Initial', 'Age_band', 
'Family_Size', 'Alone', 'Fare_cat', 'Deck', 'Title', 'Is_Married','Survived']
atributos_continuos = ['Age', 'Fare']
atributos = titanic.loc[:, atributos_discretos + atributos_continuos]

objetivo = titanic['Survived']
objetivo.head()  # objetivo es una Series unidimensional

from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

codificador_atributos_discretos = OrdinalEncoder()
codificador_atributos_discretos.fit(atributos[atributos_discretos])

print('Número de atributos detectados:',
      f'{codificador_atributos_discretos.n_features_in_}')
print()
print('Nombres de los atributos detectados:')
print(f'{codificador_atributos_discretos.feature_names_in_}')
print()
print('Categorías detectadas de cada atributo:')
for atributo, categorías in zip(
    codificador_atributos_discretos.feature_names_in_,
    codificador_atributos_discretos.categories_):
    print(f'{atributo}: {categorías}')


atributos[atributos_discretos] = codificador_atributos_discretos.transform(atributos[atributos_discretos])

atributos.head()


codificador_objetivo = LabelEncoder()
# El método fit_transform ajusta el codificador a los datos y, a continuación,
# codifica estos adecuadamente. En este caso no necesitamos mantener el
# atributo objetivo como una Series de Pandas.
objetivo = codificador_objetivo.fit_transform(objetivo)
print(f'Clases detectadas: {codificador_objetivo.classes_}')

from sklearn.preprocessing import MinMaxScaler

normalizador = MinMaxScaler(
    # Cada atributo se normaliza al intervalo [0, 1]
    feature_range=(0, 1)
)

# Como nos interesa conservar los atributos originales, realizamos la
# normalización sobre una copia del DataFrame de atributos
atributos_normalizados = atributos.copy()
atributos_normalizados[:] = normalizador.fit_transform(atributos_normalizados)
atributos_normalizados.head()

titanic = atributos_normalizados.copy()
titanic.head()



Número de atributos detectados: 14

Nombres de los atributos detectados:
['Pclass' 'Sex' 'SibSp' 'Parch' 'Embarked' 'Initial' 'Age_band'
 'Family_Size' 'Alone' 'Fare_cat' 'Deck' 'Title' 'Is_Married' 'Survived']

Categorías detectadas de cada atributo:
Pclass: [1 2 3]
Sex: ['female' 'male']
SibSp: [0 1 2 3 4 5 8]
Parch: [0 1 2 3 4 5 6]
Embarked: ['C' 'Q' 'S']
Initial: [0 1 2 3 4]
Age_band: [0 1 2 3 4]
Family_Size: [ 0  1  2  3  4  5  6  7 10]
Alone: ['No' 'Yes']
Fare_cat: [0 1 2 3]
Deck: ['A' 'B' 'C' 'D' 'E' 'F' 'G' 'T' 'U']
Title: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]
Is_Married: [0 1]
Survived: [0 1]
Clases detectadas: [0 1]


Unnamed: 0,Pclass,Sex,SibSp,Parch,Embarked,Initial,Age_band,Family_Size,Alone,Fare_cat,Deck,Title,Is_Married,Survived,Age,Fare
0,1.0,1.0,0.166667,0.0,1.0,0.0,0.25,0.125,0.0,0.0,1.0,0.6875,0.0,0.0,0.271174,0.014151
1,0.0,0.0,0.166667,0.0,0.0,0.25,0.5,0.125,0.0,1.0,0.25,0.75,1.0,1.0,0.472229,0.139136
2,1.0,0.0,0.0,0.0,1.0,0.5,0.25,0.0,1.0,0.333333,1.0,0.5,0.0,1.0,0.321438,0.015469
3,0.0,0.0,0.166667,0.0,1.0,0.25,0.5,0.125,0.0,1.0,0.25,0.75,1.0,1.0,0.434531,0.103644
4,1.0,1.0,0.0,0.0,1.0,0.0,0.5,0.0,1.0,0.333333,1.0,0.6875,0.0,0.0,0.434531,0.015713


In [2]:
bsa.backward_sequential_search(titanic,'Survived', model, 1, 3)

Unnamed: 0,variables,size,score
0,[Initial],1,0.783554
1,"[Initial, Family_Size]",2,0.80081
2,"[Pclass, Initial, Family_Size]",3,0.788179
3,"[Pclass, Initial, Family_Size, Fare]",4,0.79628
4,"[Pclass, Sex, Initial, Family_Size, Fare]",5,0.797742
5,"[Pclass, Sex, Parch, Initial, Family_Size, Fare]",6,0.796832
6,"[Pclass, Sex, Parch, Initial, Family_Size, Is_...",7,0.796448
7,"[Pclass, Sex, Parch, Initial, Family_Size, Is_...",8,0.794099
8,"[Pclass, Sex, Parch, Initial, Family_Size, Alo...",9,0.795729
9,"[Pclass, Sex, Parch, Initial, Family_Size, Alo...",10,0.796089
