<div style = 'text-align: center;'>
<font size=6>
    Counterfeit Notes Detector
    </font>
    </div>

### 1. Load the environnement

In [1]:
# Load useful libraries
import pandas as pd 
import statsmodels.formula.api as smf

### 2. Import the notes to train and the notes to test

In [2]:
notesToTrain = pd.read_csv(r'notes.csv')
notesToTrain

Unnamed: 0,is_genuine,diagonal,height_left,height_right,margin_low,margin_up,length
0,True,171.81,104.86,104.95,4.52,2.89,112.83
1,True,171.67,103.74,103.70,4.01,2.87,113.29
2,True,171.83,103.76,103.76,4.40,2.88,113.84
3,True,171.80,103.78,103.65,3.73,3.12,113.63
4,True,172.05,103.70,103.75,5.04,2.27,113.55
...,...,...,...,...,...,...,...
165,False,172.11,104.23,104.45,5.24,3.58,111.78
166,False,173.01,104.59,104.31,5.04,3.05,110.91
167,False,172.47,104.27,104.10,4.88,3.33,110.68
168,False,171.82,103.97,103.88,4.73,3.55,111.87


In [3]:
notesToTest = pd.read_csv(r'example.csv')
notesToTest

Unnamed: 0,diagonal,height_left,height_right,margin_low,margin_up,length,id
0,171.76,104.01,103.54,5.21,3.3,111.42,A_1
1,171.87,104.17,104.13,6.0,3.31,112.09,A_2
2,172.0,104.58,104.29,4.99,3.39,111.57,A_3
3,172.49,104.55,104.34,4.44,3.03,113.2,A_4
4,171.65,103.63,103.56,3.77,3.16,113.33,A_5


### 3. Run the model

In [4]:
# Logistic regression model on this model: is_genuine ~ diagonal + height_left + margin_up + margin_low

# Prepare the dataset for the modeling
notesToTrain['is_genuine'] = notesToTrain['is_genuine'].replace([True,False],[0,1]) #Transform binary values into numerical

# Run the model
model = smf.logit('is_genuine ~ diagonal + height_left + margin_up + margin_low', data = notesToTrain)
results = model.fit()

print(results.summary())

Optimization terminated successfully.
         Current function value: 0.034686
         Iterations 14
                           Logit Regression Results                           
Dep. Variable:             is_genuine   No. Observations:                  170
Model:                          Logit   Df Residuals:                      165
Method:                           MLE   Df Model:                            4
Date:                Sat, 23 Sep 2023   Pseudo R-squ.:                  0.9488
Time:                        09:16:55   Log-Likelihood:                -5.8966
converged:                       True   LL-Null:                       -115.17
Covariance Type:            nonrobust   LLR p-value:                 3.836e-46
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept    -300.1937    384.070     -0.782      0.434   -1052.956     452.569
diagonal        2.5359   

### 4. Predict the authenticity of notes to test

In [5]:
# Predict the percentage of chance to be in the group 1 = False = Counterfeit
results.predict(notesToTest)

0    1.000000e+00
1    1.000000e+00
2    9.999998e-01
3    3.395007e-04
4    9.889968e-08
dtype: float64

In [6]:
# Display the authenticity of notes in the source csv file
notesToTest.insert(7, 'Prediction%',(results.predict(notesToTest)*100).astype(int))
notesToTest.loc[notesToTest['Prediction%']>=50, 'Authenticity'] = '❌ Counterfeit'
notesToTest.loc[notesToTest['Prediction%']<50, 'Authenticity'] = '✅ Genuine'
notesToTest

Unnamed: 0,diagonal,height_left,height_right,margin_low,margin_up,length,id,Prediction%,Authenticity
0,171.76,104.01,103.54,5.21,3.3,111.42,A_1,99,❌ Counterfeit
1,171.87,104.17,104.13,6.0,3.31,112.09,A_2,99,❌ Counterfeit
2,172.0,104.58,104.29,4.99,3.39,111.57,A_3,99,❌ Counterfeit
3,172.49,104.55,104.34,4.44,3.03,113.2,A_4,0,✅ Genuine
4,171.65,103.63,103.56,3.77,3.16,113.33,A_5,0,✅ Genuine
