# Machine Learning - Evaluating Classifiers




## Evaluating classifiers



In [3]:
# importing the classification model
import pickle

# list of terms we used as features
terms = ["tannin", "cherry", "oak", "fresh", "vanilla", "rich", "blackberry", "very", "dry", "spice"]


# Loading the model with 10 features (terms)
with open('lr_model.pkl', 'rb') as file:
    lr_10 = pickle.load(file)
    
# Looking at the possible labels to predict (and in which order they are stored)
print(lr_10.classes_)

# Print each label and corresponding coefficients and intercept
for label, coefs, intercept in zip(lr_10.classes_, lr_10.coef_, lr_10.intercept_):
    print(label)
    for term, coef in zip(terms, coefs):
        print(term, coef)
    print(f"INTERCEPT {intercept}")
    print()


['Red' 'Rose' 'White' 'unk']
Red
tannin 2.383661939736888
cherry 1.9274105719068597
oak 1.3050397713743673
fresh -0.7187393751756458
vanilla -0.35856506673104627
rich -0.2262790743078582
blackberry 2.4503412762346746
very 0.1414245846276523
dry -0.24413649876702623
spice 0.6519265614229908
INTERCEPT 0.8543590705888237

Rose
tannin -0.6266939262327331
cherry 1.3921244363441443
oak -1.4622530150523196
fresh 0.49686452802491615
vanilla 0.305723523004968
rich -0.4962346320419678
blackberry 0.3396785232108212
very -0.41421327477594533
dry 0.14391027564668946
spice -0.3819085563059017
INTERCEPT -1.6570325008203965

White
tannin -1.8945664666243693
cherry -2.858257960755354
oak 1.0538210926332086
fresh 0.10189358829165852
vanilla 0.059132997176185774
rich 0.2344474242024624
blackberry -2.3499664049992304
very -0.007593425443304999
dry -0.13199375581460723
spice -0.13348973242972784
INTERCEPT 1.211873847292693

unk
tannin 0.13759845312021848
cherry -0.4612770474956476
oak -0.8966078489552564
f

Features extraction for the test set to get predictions.

In [4]:
import pandas
import numpy

# Reading test set and making the feature vectors
test_set = pandas.read_csv('./test.csv', sep='\t', encoding='utf-8')
test_set

# Extracting only the relevant columns, and puting them in lists
# These colums have the titles 'Review' and 'Color'
test_reviews = test_set['Review'].to_list()
test_colors = test_set['Color'].to_list()

# Printing the first five item in each list to make sure it looks ok
for rev, col in zip(test_reviews[:5], test_colors[:5]):
  print(rev)
  print(col)
  print()

# Creating zero vectors (of length 10) for each review (of len(test_reviews))
test_features = numpy.zeros((len(test_reviews), 10))
print(test_features.shape)

Leather, spice, tobacco and tea emerge from the nose of this Sicilian blend of Nero d’Avola, Syrah, Merlot, Cabernet and Petit Verdot. You’ll get aromas of clove, allspice and vanilla behind vibrant blueberry and raspberry.
Red

So pale that it’s almost colorless, the Blangé—made from Arneis grapes in Piedmont—has the oak-meets-citrus nose you’d expect of a Chardonnay. The lemon, grapefruit and pear flavors, coupled with a superspritzy, Asti-like mouthfeel, make this a good Sunday brunch eye-opener. Its slightly high acidity could cut through any French toast or waffle that you throw its way. Finishes medium-long, with herbal notes. 
White

Comes across on the earthy, herbal side, although there are some pretty raspberry notes on the palate. A little heavy, and lacks perhaps some delicacy. A sweet cola-like finish completes the picture.
Red

What a wonderful wine to pair with spaghetti alle vongole or calamari fritti. Made from Grechetto grapes and aged only in stainless steel, this is

In [5]:
# Generating features
import spacy

nlp = spacy.load('en_core_web_sm')

# Processing a text
test_doc_reviews = nlp.pipe(test_reviews)

# Updating the feature vectors by checkin if the terms exist per review
for review, f in zip(test_doc_reviews, test_features):
    tokens_list = [token.lemma_ for token in review]
    #print(tokens_list)
    for term in terms:
        if term in tokens_list:
            term_id = terms.index(term)
            f[term_id] = 1


Prediction

In [13]:
# Pretiction using the features as input to the model
test_predictions = lr_10.predict(test_features)

### Accuracy

Correct predictions divided by the total number of predictions (ratio of correct predictions to all predictions)

In [8]:
from sklearn.metrics import accuracy_score

# We can calculate the fraction of correctly predicted labels in the test set
# this is the accuracy score
acc = accuracy_score(test_colors, test_predictions)
corr_count = accuracy_score(test_colors, test_predictions, normalize = False)
total_count=len(test_reviews)

print(f"Sklearn accuracy: {acc}")
print(f"Total reviews: {total_count}")
print(f"Total correct predictions: {corr_count}")

corr_ratio = corr_count /total_count
print(f"Correct ratio (accuracy): {corr_ratio}")

Sklearn accuracy: 0.723
Total reviews: 1000
Total correct predictions: 723
Correct ratio (accuracy): 0.723


In [None]:
from sklearn.metrics import confusion_matrix
from collections import Counter
import pandas as pd

# Let's load the training data from a csv file
test_set = pandas.read_csv('./test.csv', sep='\t', encoding='utf-8')
test_colors = test_set['Color'].to_list()


# Get a dictionary of unique items with their counts
print(Counter(test_colors))


# Get the confusion matrix
labels=["Red", "White", "Rose", "unk"]
???
# The order of the labels is not important, but it is important to provide all the labels.


### F-score

#### Confusion matrix 

In [15]:
from sklearn.metrics import confusion_matrix
import pandas as pd

my_labels=["Red", "White", "Rose", "unk"]
cm1 = confusion_matrix(test_colors, test_predictions, labels = my_labels)
# The order of the labels is not important, but it is important to provide all the labels.
pd.DataFrame(cm1, index=my_labels, columns=my_labels)

Unnamed: 0,Red,White,Rose,unk
Red,432,168,0,0
White,19,291,0,0
Rose,4,15,0,0
unk,10,61,0,0


#### Classification report

In [16]:
from sklearn.metrics import classification_report

print(classification_report(test_colors, test_predictions))

              precision    recall  f1-score   support

         Red       0.93      0.72      0.81       600
        Rose       0.00      0.00      0.00        19
       White       0.54      0.94      0.69       310
         unk       0.00      0.00      0.00        71

    accuracy                           0.72      1000
   macro avg       0.37      0.41      0.38      1000
weighted avg       0.73      0.72      0.70      1000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
