In [108]:
import numpy as np
from scipy.stats import kurtosis, mode, skew
from sklearn.cross_validation import train_test_split
from sklearn.feature_selection import SelectKBest, f_regression 
from sklearn.linear_model import LogisticRegression

In [109]:
def data_to_array(archivo):
    raw_data = []
    with open(archivo) as file:
        lines = file.readlines()
        for line in lines:
            raw_data.append(np.array(line.strip().split()).astype(float))
    return raw_data

In [110]:
def caracterizacion(raw_data):
    caracteristicas = []
    for data in raw_data:
        caracteristicas.append(
            [np.mean(data),
             np.median(data),
             kurtosis(data),
             np.std(data),
             np.var(data),
             skew(data),
             mode(data)[0][0]
            ])
    data = np.array(caracteristicas)
    return data

In [111]:
raw_data = data_to_array("synthetic_control.data")
X = caracterizacion(raw_data)

# Creación de la etiqueta

In [112]:
y = []
for i in range(6):
    y += [i+1] * 100

# Preprocesamiento

In [113]:
def remocion_train(X):
    X = X - X.mean(axis=0)
    X = X/X.std(axis=0)
    
    return X

def remocion_test(X_test):
    media = X_train.mean(axis=0)
    desviacion = X_train.std(axis=0)
    
    X_test = X_test - media
    X_test = X_test/desviacion
    
    return X_test

def preprocesamiento(X_train, X_test):
    x_train_norm = remocion_train(X_train)
    x_test_norm = remocion_test(X_test)
    
    return x_train_norm, x_test_norm

# Realizar las pruebas 10 veces

In [235]:
data = []
for i in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    
    x_train_norm, x_test_norm = preprocesamiento(X_train, X_test)
    
    selector = SelectKBest(f_regression, k=4)
    selector.fit(x_train_norm, y_train)

    x_predict_train = selector.transform(X_train)
    x_predict_test = selector.transform(X_test)

    clasificador = LogisticRegression(C=100.0)
    clasificador.fit(x_predict_train, y_train)

    y_pred = clasificador.predict(x_predict_test)

    data.append(100*(y_test == y_pred).sum()/len(y_test)) 
    
print('El resultado del acierto de clasificación %f +/- %f' %(np.mean(data),np.std(data)))

El resultado del acierto de clasificación 92.166667 +/- 3.663719
