In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import string
from nltk.corpus import stopwords
from collections import Counter
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
from sklearn.metrics import f1_score

# Load Dataset

In [2]:
dataset_full = pd.read_csv('blogtext.csv')
dataset_full.head()

Unnamed: 0,id,gender,age,topic,sign,date,text
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,..."
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...


In [3]:
dataset_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 681284 entries, 0 to 681283
Data columns (total 7 columns):
id        681284 non-null int64
gender    681284 non-null object
age       681284 non-null int64
topic     681284 non-null object
sign      681284 non-null object
date      681284 non-null object
text      681284 non-null object
dtypes: int64(2), object(5)
memory usage: 36.4+ MB


In [4]:
dataset = dataset_full.head(5000)

# Preprocess rows of the “text” column 

In [5]:
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

dataset["text_wo_punct"] = dataset["text"].apply(lambda text: remove_punctuation(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_punct
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info has been found 100 pages and ...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These are the team members Drewes...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks to Yahoos Toolbar I can no...


In [6]:
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    """custom function to remove the stopwords"""
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

dataset["text_wo_stop"] = dataset["text_wo_punct"].apply(lambda text: remove_stopwords(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_punct,text_wo_stop
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info has been found 100 pages and ...,Info found 100 pages 45 MB pdf files Now wait ...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These are the team members Drewes...,These team members Drewes van der Laag urlLink...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks to Yahoos Toolbar I can no...,Thanks Yahoos Toolbar I capture URLs popupswhi...


In [7]:
cnt = Counter()
FREQWORDS = set([w for (w, wc) in cnt.most_common(10)])
def remove_freqwords(text):
    """custom function to remove the frequent words"""
    return " ".join([word for word in str(text).split() if word not in FREQWORDS])

dataset["text_wo_stopfreq"] = dataset["text_wo_stop"].apply(lambda text: remove_freqwords(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_punct,text_wo_stop,text_wo_stopfreq
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info has been found 100 pages and ...,Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These are the team members Drewes...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks to Yahoos Toolbar I can no...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...


In [8]:
# Drop the two columns which are no more needed 
dataset.drop(["text_wo_punct", "text_wo_stop"], axis=1, inplace=True)

n_rare_words = 10
RAREWORDS = set([w for (w, wc) in cnt.most_common()[:-n_rare_words-1:-1]])
def remove_rarewords(text):
    """custom function to remove the rare words"""
    return " ".join([word for word in str(text).split() if word not in RAREWORDS])

dataset["text_wo_stopfreqrare"] = dataset["text_wo_stopfreq"].apply(lambda text: remove_rarewords(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_stopfreq,text_wo_stopfreqrare
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...


In [9]:
# Drop the two columns which are no more needed 
#dataset.drop(["text_wo_stop"], axis=1, inplace=True)

n_rare_words = 10
RAREWORDS = set([w for (w, wc) in cnt.most_common()[:-n_rare_words-1:-1]])
def remove_rarewords(text):
    """custom function to remove the rare words"""
    return " ".join([word for word in str(text).split() if word not in RAREWORDS])

dataset["text_wo_stopfreqrare"] = dataset["text_wo_stopfreq"].apply(lambda text: remove_rarewords(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_stopfreq,text_wo_stopfreqrare
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...


In [10]:
lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    return " ".join([lemmatizer.lemmatize(word) for word in text.split()])

dataset["text_lemmatized"] = dataset["text_wo_stopfreqrare"].apply(lambda text: lemmatize_words(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_stopfreq,text_wo_stopfreqrare,text_lemmatized
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 page 45 MB pdf file Now wait un...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...,These team member Drewes van der Laag urlLink ...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing,testing testing
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...


In [11]:
# Drop the two columns 
#dataset.drop(["text_wo_stopfreq", "text_wo_stopfreqrare"], axis=1, inplace=True) 

stemmer = PorterStemmer()
def stem_words(text):
    return " ".join([stemmer.stem(word) for word in text.split()])

dataset["text_preprocessed"] = dataset["text_lemmatized"].apply(lambda text: stem_words(text))
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_stopfreq,text_wo_stopfreqrare,text_lemmatized,text_preprocessed
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 page 45 MB pdf file Now wait un...,info found 100 page 45 MB pdf file now wait un...
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...,These team member Drewes van der Laag urlLink ...,these team member drew van der laag urllink ma...
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusi op aard maak JE eige...
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing,testing testing,test test
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...,thank yahoo toolbar I captur url popupswhich m...


# As we want to make this into a multi-label classification problem, you are required to merge all the label columns together, so that we have all the labels together for a particular sentence 

In [12]:
label_y = []

for row in dataset.iterrows():
    row_labels = []
    row_labels.append(str(row[1][1]))
    row_labels.append(str(row[1][2]))
    row_labels.append(str(row[1][3]))
    row_labels.append(str(row[1][4]))
    label_y.append(row_labels)

dataset['Labels'] = label_y
dataset.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,id,gender,age,topic,sign,date,text,text_wo_stopfreq,text_wo_stopfreqrare,text_lemmatized,text_preprocessed,Labels
0,2059027,male,15,Student,Leo,"14,May,2004","Info has been found (+/- 100 pages,...",Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 pages 45 MB pdf files Now wait ...,Info found 100 page 45 MB pdf file Now wait un...,info found 100 page 45 MB pdf file now wait un...,"[male, 15, Student, Leo]"
1,2059027,male,15,Student,Leo,"13,May,2004",These are the team members: Drewe...,These team members Drewes van der Laag urlLink...,These team members Drewes van der Laag urlLink...,These team member Drewes van der Laag urlLink ...,these team member drew van der laag urllink ma...,"[male, 15, Student, Leo]"
2,2059027,male,15,Student,Leo,"12,May,2004",In het kader van kernfusie op aarde...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusie op aarde MAAK JE EI...,In het kader van kernfusi op aard maak JE eige...,"[male, 15, Student, Leo]"
3,2059027,male,15,Student,Leo,"12,May,2004",testing!!! testing!!!,testing testing,testing testing,testing testing,test test,"[male, 15, Student, Leo]"
4,3581210,male,33,InvestmentBanking,Aquarius,"11,June,2004",Thanks to Yahoo!'s Toolbar I can ...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...,Thanks Yahoos Toolbar I capture URLs popupswhi...,thank yahoo toolbar I captur url popupswhich m...,"[male, 33, InvestmentBanking, Aquarius]"


# Create a dictionary to get the count of every label i.e. the key will be label name and value will be the total count of the label. Check below image for reference

In [13]:
gender = dataset['gender'].value_counts().to_dict()
print(gender)

{'male': 3294, 'female': 1706}


In [14]:
age = dataset['age'].value_counts().to_dict()
print(age)

{35: 2307, 34: 540, 24: 353, 15: 339, 17: 331, 25: 268, 14: 170, 23: 137, 33: 101, 26: 96, 27: 86, 39: 79, 16: 67, 36: 60, 37: 19, 41: 14, 45: 14, 42: 9, 46: 7, 44: 3}


In [15]:
topics = dataset['topic'].value_counts().to_dict()
print(topics)

{'Technology': 2332, 'indUnk': 1381, 'Student': 569, 'Engineering': 119, 'Education': 118, 'BusinessServices': 87, 'Sports-Recreation': 75, 'InvestmentBanking': 70, 'Communications-Media': 61, 'Non-Profit': 47, 'Science': 33, 'Arts': 31, 'Internet': 20, 'Consulting': 16, 'Banking': 16, 'Automotive': 14, 'Religion': 4, 'Law': 3, 'Museums-Libraries': 2, 'Accounting': 2}


In [16]:
signs = dataset['sign'].value_counts().to_dict()
print(signs)

{'Aries': 2483, 'Sagittarius': 704, 'Libra': 414, 'Scorpio': 408, 'Aquarius': 329, 'Leo': 190, 'Taurus': 100, 'Cancer': 94, 'Gemini': 86, 'Capricorn': 84, 'Pisces': 67, 'Virgo': 41}


In [17]:
label_dictionary = {}
label_dictionary.update(gender)
label_dictionary.update(age)
label_dictionary.update(topics)
label_dictionary.update(signs)
print(label_dictionary)

{'male': 3294, 'female': 1706, 35: 2307, 34: 540, 24: 353, 15: 339, 17: 331, 25: 268, 14: 170, 23: 137, 33: 101, 26: 96, 27: 86, 39: 79, 16: 67, 36: 60, 37: 19, 41: 14, 45: 14, 42: 9, 46: 7, 44: 3, 'Technology': 2332, 'indUnk': 1381, 'Student': 569, 'Engineering': 119, 'Education': 118, 'BusinessServices': 87, 'Sports-Recreation': 75, 'InvestmentBanking': 70, 'Communications-Media': 61, 'Non-Profit': 47, 'Science': 33, 'Arts': 31, 'Internet': 20, 'Consulting': 16, 'Banking': 16, 'Automotive': 14, 'Religion': 4, 'Law': 3, 'Museums-Libraries': 2, 'Accounting': 2, 'Aries': 2483, 'Sagittarius': 704, 'Libra': 414, 'Scorpio': 408, 'Aquarius': 329, 'Leo': 190, 'Taurus': 100, 'Cancer': 94, 'Gemini': 86, 'Capricorn': 84, 'Pisces': 67, 'Virgo': 41}


In [18]:
dataset = dataset[["text_preprocessed", "Labels"]]
dataset.head()

Unnamed: 0,text_preprocessed,Labels
0,info found 100 page 45 MB pdf file now wait un...,"[male, 15, Student, Leo]"
1,these team member drew van der laag urllink ma...,"[male, 15, Student, Leo]"
2,In het kader van kernfusi op aard maak JE eige...,"[male, 15, Student, Leo]"
3,test test,"[male, 15, Student, Leo]"
4,thank yahoo toolbar I captur url popupswhich m...,"[male, 33, InvestmentBanking, Aquarius]"


# Separate features and labels, and split the data into training and testing 

In [19]:
X = dataset['text_preprocessed']
y = dataset['Labels']

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Vectorize the features 

In [21]:
cnt_vectorizer = CountVectorizer(ngram_range=(1,2))
X_train = cnt_vectorizer.fit_transform(X_train)
X_test = cnt_vectorizer.transform(X_test)

In [30]:
#10 Sample features
cnt_vectorizer.get_feature_names()[0:10]

['000',
 '000 peopl',
 '0000',
 '0000 blink',
 '001',
 '001 first',
 '002',
 '002 middl',
 '003',
 '003 last']

In [23]:
mlb = MultiLabelBinarizer()
y_train_multi = mlb.fit_transform(y_train)
y_test_multi = mlb.transform(y_test)

In [24]:
clf = LogisticRegression(random_state=0, solver='lbfgs',
                         multi_class='multinomial')
classif = OneVsRestClassifier(clf)
classif.fit(X_train, y_train_multi)


OneVsRestClassifier(estimator=LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='multinomial',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=0, solver='lbfgs',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False),
                    n_jobs=None)

In [25]:
pred = classif.predict(X_test)

In [31]:
# Python script for confusion matrix creation. 
results = multilabel_confusion_matrix(y_test_multi, pred) 
print('Confusion Matrix :')
print(results) 

Confusion Matrix :
[[[ 966    0]
  [  29    5]]

 [[ 926    9]
  [  45   20]]

 [[ 977    0]
  [  23    0]]

 [[ 930    5]
  [  43   22]]

 [[ 971    3]
  [  25    1]]

 [[ 928   10]
  [  38   24]]

 [[ 931    4]
  [  58    7]]

 [[ 980    0]
  [  20    0]]

 [[ 982    0]
  [  13    5]]

 [[ 978    1]
  [  13    8]]

 [[ 893    2]
  [  31   74]]

 [[ 453   86]
  [  46  415]]

 [[ 985    4]
  [   9    2]]

 [[ 996    0]
  [   4    0]]

 [[ 988    0]
  [  12    0]]

 [[ 997    1]
  [   2    0]]

 [[ 999    0]
  [   1    0]]

 [[1000    0]
  [   0    0]]

 [[ 996    0]
  [   4    0]]

 [[ 999    0]
  [   1    0]]

 [[ 999    0]
  [   1    0]]

 [[ 926    5]
  [  54   15]]

 [[ 394  102]
  [  51  453]]

 [[ 996    0]
  [   4    0]]

 [[ 997    1]
  [   2    0]]

 [[ 996    0]
  [   4    0]]

 [[ 974    1]
  [  21    4]]

 [[ 981    0]
  [  16    3]]

 [[ 984    0]
  [   9    7]]

 [[ 990    3]
  [   7    0]]

 [[ 998    0]
  [   2    0]]

 [[ 973    2]
  [  21    4]]

 [[ 976    2]
  [  10

In [32]:
print('Report : ')
print(classification_report(y_test_multi, pred))

Report : 
              precision    recall  f1-score   support

           0       1.00      0.15      0.26        34
           1       0.69      0.31      0.43        65
           2       0.00      0.00      0.00        23
           3       0.81      0.34      0.48        65
           4       0.25      0.04      0.07        26
           5       0.71      0.39      0.50        62
           6       0.64      0.11      0.18        65
           7       0.00      0.00      0.00        20
           8       1.00      0.28      0.43        18
           9       0.89      0.38      0.53        21
          10       0.97      0.70      0.82       105
          11       0.83      0.90      0.86       461
          12       0.33      0.18      0.24        11
          13       0.00      0.00      0.00         4
          14       0.00      0.00      0.00        12
          15       0.00      0.00      0.00         2
          16       0.00      0.00      0.00         1
          17     

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [29]:
print('Accuracy Score :',accuracy_score(y_test_multi, pred))

Accuracy Score : 0.524
