# Flower recognition
The goal of this exercise is to introduce the most Logistic Regression classifier

## Tasks:
1. Load data
2. Visualise data
3. Train a model
4. Evaluate the model

In [None]:
# Dataprocessing tool
import pandas as pd

# Matrix operations
import numpy as np

# Visualisation libs
import seaborn as sns

# Dataset
from sklearn.datasets import load_iris

# Lets use magic to include figure in the notebooks
%matplotlib notebook 

## 1. Load data

In [None]:
iris = load_iris()

In [None]:
iris.keys()

## 2. Visualise data

In [None]:
# Some datahacking to make a Pandas DataFrame
df = pd.DataFrame(np.hstack((iris.data, iris.target.reshape(-1,1))), columns=iris.feature_names + ['class'])

In [None]:
# Display dataframe as a table
df

In [None]:
# Visualise how each pair of feature can separate classes
sns.pairplot(data=df, vars=iris.feature_names, hue='class')

## 3. Train a model

### Train and test split
We need to split dataset into training and testing set in order to evaluate the model. Training set will be used for training the model and testing set for evaluating it.

In [None]:
from sklearn.cross_validation import train_test_split

In [None]:
# X_train are the features for training set
# X_test are the features for targets set
# y_train are the targets for training set
# y_test are the targets for testing set
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2)

In [None]:
X_train.shape, X_test.shape

### Train a model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression()

In [None]:
clf.fit(X_train, y_train)

## 4. Evaluate the model

In [None]:
from sklearn.metrics import accuracy_score, classification_report, precision_score

### Evaluate the model with testing data

In [None]:
y_test_pred = clf.predict(X_test)

In [None]:
accuracy_score(y_test, y_test_pred)

In [None]:
print(classification_report(y_test, y_test_pred, target_names=iris.target_names))