In [1]:
import pickle
import csv
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
def classifier(features, labels):
    lr = LogisticRegression()
    lr.fit(features, labels)
    return lr

In [4]:
file = "iris_data.csv"
with open(file) as f:
    reader = csv.reader(f)
    header = next(reader)
    iris_data, iris_type = [], []
    for row in reader:
        iris_data.append(tuple([float(x) for x in row[:-1]]))
        iris_type.append(row[-1])

In [5]:
print(header)
print(iris_data[:5])
print(iris_type[:5])

['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'type']
[(5.1, 3.5, 1.4, 0.2), (4.9, 3.0, 1.4, 0.2), (4.7, 3.2, 1.3, 0.2), (4.6, 3.1, 1.5, 0.2), (5.0, 3.6, 1.4, 0.2)]
['setosa', 'setosa', 'setosa', 'setosa', 'setosa']


In [7]:
# esta transformación no es necesaria, pero quizá es más eficiente
iris_array = np.array(iris_data)
iris_array[:3]

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2]])

In [8]:
lr_model = classifier(iris_array, iris_type)

In [10]:
# accuracy in training set
pred = lr_model.predict(iris_array)
accuracy = accuracy_score(y_true=iris_type, y_pred=pred)
print(accuracy)

0.96


In [11]:
# test
def scorer(data, model=lr_model):
    try:
        # ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
        sepal_length, sepal_width = float(data['sepal_length']), float(data['sepal_width'])
        petal_length, petal_width = float(data['petal_length']), float(data['petal_width'])
        data_p = [[sepal_length, sepal_width, petal_length, petal_width]]
        pred = model.predict(data_p)[0]
        ppred = round(max(model.predict_proba(data_p)[0]), 4)
        return {'probability': ppred, 'class': pred, 'message': 'success'}
    except:
        return {'probability': None, 'class': None, 'message': 'there was an error'}

In [12]:
test_data = {'sepal_length': 4.9, 'sepal_width': 3, 'petal_length': 1.4, 'petal_width': 0.2}
print(scorer(test_data))

{'message': 'success', 'probability': 0.79969999999999997, 'class': 'setosa'}


In [13]:
# serializar modelo y otros objetos necesarios para pronosticar
with open("scorer.pkl", "wb") as f:
    pickle.dump(lr_model, f)