# Model use example
In this notebook, we will use our best model to predict if new bank notes are fake or real based on geometric data.

## Import packages and data

In [20]:
# Import packages
## Base
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## ML model
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

## Model import
import joblib

## Other
from sklearn.metrics import confusion_matrix

In [39]:
## Import model
model = joblib.load( 'Notes_detection_GMM.pkl' )

## Import scaler
scaler = joblib.load( 'Notes_detection_scaler.pkl' )

## Import data
new_data = pd.read_csv( "../Data/billets_production.csv", sep = ',')
#new_data = pd.read_csv( "../Data/billets.csv", sep = ';')
#new_data = new_data.dropna(axis = 0, how = 'any')

new_data.head()

Unnamed: 0,diagonal,height_left,height_right,margin_low,margin_up,length,id
0,171.76,104.01,103.54,5.21,3.3,111.42,A_1
1,171.87,104.17,104.13,6.0,3.31,112.09,A_2
2,172.0,104.58,104.29,4.99,3.39,111.57,A_3
3,172.49,104.55,104.34,4.44,3.03,113.2,A_4
4,171.65,103.63,103.56,3.77,3.16,113.33,A_5


In [40]:
## Create X and y arrays with dependent and independent variables
### Column order for X: 'diagonal', 'height_left', 'height_right', 'margin_low', 'margin_up', 'length': 
X = new_data[['diagonal', 'height_left', 'height_right', 'margin_low', 'margin_up', 'length']]
X = scaler.transform(X)
#y = new_data['is_genuine']
note_id =  new_data['id']

## Run model

In [41]:
## Predict for the new note measurements
y_pred = model.predict( X )

## turn cluster number into boolean
y_pred = ~y_pred.astype('bool')

In [44]:
## Check number of fake bank notes found
display( pd.Series(y_pred).value_counts() )

pd.DataFrame({
    'id' : note_id,
    'is_genuine' : y_pred
})

False    3
True     2
Name: count, dtype: int64

Unnamed: 0,id,is_genuine
0,A_1,False
1,A_2,False
2,A_3,False
3,A_4,True
4,A_5,True
