### Importing relevant packages

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from nltk.corpus import stopwords
import eli5
import warnings
warnings.filterwarnings('ignore')
import joblib
import os

### Loading our data

In [2]:
path=os.getcwd()
parent=os.path.dirname(path)
data_path=os.path.join(parent,'Data fetching from api','data.csv')

In [3]:
df=pd.read_csv(data_path)
df.head()

Unnamed: 0,sentences,dialect
0,ŸÑŸÉŸÜ ÿ®ÿßŸÑŸÜŸáÿßŸäÿ© .. ŸäŸÜÿ™ŸÅÿ∂ .. Ÿäÿ∫Ÿäÿ± .,IQ
1,ŸäÿπŸÜŸä Ÿáÿ∞ÿß ŸÖÿ≠ÿ≥Ÿàÿ® ÿπŸÑŸâ ÿßŸÑÿ®ÿ¥ÿ± .. ÿ≠ŸäŸàŸÜŸá ŸàŸàÿ≠ÿ¥ŸäŸá .. Ÿàÿ™...,IQ
2,ŸÖÿ®ŸäŸÜ ŸÖŸÜ ŸÉŸÑÿßŸÖŸá ÿÆŸÑŸäÿ¨Ÿä,IQ
3,Ÿäÿ≥ŸÑŸÖŸÑŸä ŸÖÿ±Ÿàÿ±ŸÉ Ÿàÿ±Ÿàÿ≠ŸÉ ÿßŸÑÿ≠ŸÑŸàŸáüíê,IQ
4,ŸàŸäŸÜ ŸáŸÑ ÿßŸÑÿ∫Ÿäÿ®Ÿá ÿßÿÆ ŸÖÿ≠ŸÖÿØ üå∏üå∫,IQ


In [25]:
df.dialect.value_counts()

EG    57636
PL    43742
KW    42109
LY    36499
QA    31069
JO    27921
LB    27617
SA    26832
AE    26296
BH    26292
OM    19116
SY    16242
DZ    16183
IQ    15497
SD    14434
MA    11539
YE     9927
TN     9246
Name: dialect, dtype: int64

### Shuffling data

In [3]:
df_shuffled=df.sample(frac=1,random_state=42)

### Train/Test split

In [4]:
df_train= df_shuffled.iloc[:int(0.8*len(df_shuffled))]
df_test= df_shuffled.iloc[int(0.8*len(df_shuffled)):]


### Defining report function

In [5]:
def print_report(pipe, x_test, y_test):
    y_pred = pipe.predict(x_test)
    report = metrics.classification_report(y_test, y_pred)
    print(report)
    print("accuracy: {:0.3f}".format(metrics.accuracy_score(y_test, y_pred)))

### Creating stop words list

In [6]:
stop_words_list= stopwords.words('arabic')

### Random forest

In [7]:
vec = CountVectorizer(stop_words=stop_words_list)
clf=RandomForestClassifier(n_estimators=100,max_depth=2)
pipe = make_pipeline(vec, clf)


In [8]:
pipe.fit(df_train.sentences, df_train.dialect)

Pipeline(steps=[('countvectorizer',
                 CountVectorizer(stop_words=['ÿ•ÿ∞', 'ÿ•ÿ∞ÿß', 'ÿ•ÿ∞ŸÖÿß', 'ÿ•ÿ∞ŸÜ', 'ÿ£ŸÅ',
                                             'ÿ£ŸÇŸÑ', 'ÿ£ŸÉÿ´ÿ±', 'ÿ£ŸÑÿß', 'ÿ•ŸÑÿß',
                                             'ÿßŸÑÿ™Ÿä', 'ÿßŸÑÿ∞Ÿä', 'ÿßŸÑÿ∞ŸäŸÜ', 'ÿßŸÑŸÑÿßÿ™Ÿä',
                                             'ÿßŸÑŸÑÿßÿ¶Ÿä', 'ÿßŸÑŸÑÿ™ÿßŸÜ', 'ÿßŸÑŸÑÿ™Ÿäÿß',
                                             'ÿßŸÑŸÑÿ™ŸäŸÜ', 'ÿßŸÑŸÑÿ∞ÿßŸÜ', 'ÿßŸÑŸÑÿ∞ŸäŸÜ',
                                             'ÿßŸÑŸÑŸàÿßÿ™Ÿä', 'ÿ•ŸÑŸâ', 'ÿ•ŸÑŸäŸÉ', 'ÿ•ŸÑŸäŸÉŸÖ',
                                             'ÿ•ŸÑŸäŸÉŸÖÿß', 'ÿ•ŸÑŸäŸÉŸÜ', 'ÿ£ŸÖ', 'ÿ£ŸÖÿß',
                                             'ÿ£ŸÖÿß', 'ÿ•ŸÖÿß', 'ÿ£ŸÜ', ...])),
                ('randomforestclassifier',
                 RandomForestClassifier(max_depth=2))])

In [9]:
print_report(pipe, df_test.sentences, df_test.dialect)

              precision    recall  f1-score   support

          AE       0.00      0.00      0.00      5264
          BH       0.00      0.00      0.00      5113
          DZ       0.00      0.00      0.00      3142
          EG       0.13      1.00      0.23     11621
          IQ       0.00      0.00      0.00      3183
          JO       0.00      0.00      0.00      5488
          KW       0.00      0.00      0.00      8423
          LB       0.00      0.00      0.00      5517
          LY       0.00      0.00      0.00      7325
          MA       0.00      0.00      0.00      2281
          OM       0.00      0.00      0.00      3809
          PL       0.00      0.00      0.00      8814
          QA       0.00      0.00      0.00      6216
          SA       0.00      0.00      0.00      5430
          SD       0.00      0.00      0.00      2910
          SY       0.00      0.00      0.00      3217
          TN       0.00      0.00      0.00      1936
          YE       0.00    

### Logistic regression

In [10]:
vec = CountVectorizer()
clf = LogisticRegression()
pipe = make_pipeline(vec, clf)


In [11]:
pipe.fit(df_train.sentences, df_train.dialect)

Pipeline(steps=[('countvectorizer', CountVectorizer()),
                ('logisticregression', LogisticRegression())])

In [12]:
print_report(pipe, df_test.sentences, df_test.dialect)

              precision    recall  f1-score   support

          AE       0.41      0.42      0.41      5264
          BH       0.40      0.31      0.35      5113
          DZ       0.59      0.53      0.56      3142
          EG       0.69      0.83      0.76     11621
          IQ       0.63      0.52      0.57      3183
          JO       0.42      0.36      0.39      5488
          KW       0.47      0.56      0.51      8423
          LB       0.60      0.67      0.63      5517
          LY       0.63      0.68      0.66      7325
          MA       0.74      0.56      0.64      2281
          OM       0.41      0.34      0.37      3809
          PL       0.50      0.52      0.51      8814
          QA       0.44      0.49      0.47      6216
          SA       0.40      0.43      0.42      5430
          SD       0.70      0.55      0.62      2910
          SY       0.45      0.34      0.39      3217
          TN       0.67      0.43      0.52      1936
          YE       0.41    

### logistic regression is better than random forest

### Removing stop words doesn't increase accuracy

### Let's better understand our model

In [13]:
eli5.show_weights(clf, vec=vec, top=20)

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6,Unnamed: 16_level_6,Unnamed: 17_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7,Unnamed: 16_level_7,Unnamed: 17_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8,Unnamed: 16_level_8,Unnamed: 17_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,Unnamed: 13_level_9,Unnamed: 14_level_9,Unnamed: 15_level_9,Unnamed: 16_level_9,Unnamed: 17_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,Unnamed: 13_level_10,Unnamed: 14_level_10,Unnamed: 15_level_10,Unnamed: 16_level_10,Unnamed: 17_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,Unnamed: 13_level_11,Unnamed: 14_level_11,Unnamed: 15_level_11,Unnamed: 16_level_11,Unnamed: 17_level_11
Weight?,Feature,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12,Unnamed: 16_level_12,Unnamed: 17_level_12
Weight?,Feature,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13,Unnamed: 6_level_13,Unnamed: 7_level_13,Unnamed: 8_level_13,Unnamed: 9_level_13,Unnamed: 10_level_13,Unnamed: 11_level_13,Unnamed: 12_level_13,Unnamed: 13_level_13,Unnamed: 14_level_13,Unnamed: 15_level_13,Unnamed: 16_level_13,Unnamed: 17_level_13
Weight?,Feature,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,Unnamed: 10_level_14,Unnamed: 11_level_14,Unnamed: 12_level_14,Unnamed: 13_level_14,Unnamed: 14_level_14,Unnamed: 15_level_14,Unnamed: 16_level_14,Unnamed: 17_level_14
Weight?,Feature,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15,Unnamed: 6_level_15,Unnamed: 7_level_15,Unnamed: 8_level_15,Unnamed: 9_level_15,Unnamed: 10_level_15,Unnamed: 11_level_15,Unnamed: 12_level_15,Unnamed: 13_level_15,Unnamed: 14_level_15,Unnamed: 15_level_15,Unnamed: 16_level_15,Unnamed: 17_level_15
Weight?,Feature,Unnamed: 2_level_16,Unnamed: 3_level_16,Unnamed: 4_level_16,Unnamed: 5_level_16,Unnamed: 6_level_16,Unnamed: 7_level_16,Unnamed: 8_level_16,Unnamed: 9_level_16,Unnamed: 10_level_16,Unnamed: 11_level_16,Unnamed: 12_level_16,Unnamed: 13_level_16,Unnamed: 14_level_16,Unnamed: 15_level_16,Unnamed: 16_level_16,Unnamed: 17_level_16
Weight?,Feature,Unnamed: 2_level_17,Unnamed: 3_level_17,Unnamed: 4_level_17,Unnamed: 5_level_17,Unnamed: 6_level_17,Unnamed: 7_level_17,Unnamed: 8_level_17,Unnamed: 9_level_17,Unnamed: 10_level_17,Unnamed: 11_level_17,Unnamed: 12_level_17,Unnamed: 13_level_17,Unnamed: 14_level_17,Unnamed: 15_level_17,Unnamed: 16_level_17,Unnamed: 17_level_17
+6.215,ÿßŸÑÿ≠ŸäŸÜŸá,,,,,,,,,,,,,,,,
+5.075,ŸÖÿ®,,,,,,,,,,,,,,,,
+5.067,ÿ¥ÿ±ÿßÿ™,,,,,,,,,,,,,,,,
+4.852,ŸäÿßŸÑÿ≥,,,,,,,,,,,,,,,,
+4.568,ŸÜÿßÿØŸäŸÜ_ŸÜÿ≥Ÿäÿ®_ŸÜÿ¨ŸäŸÖ,,,,,,,,,,,,,,,,
+4.355,ÿ®Ÿàÿ∏ÿ®Ÿä,,,,,,,,,,,,,,,,
+4.199,ÿπŸäŸÜÿßŸàŸä,,,,,,,,,,,,,,,,
+4.128,ŸÜÿ®ÿß,,,,,,,,,,,,,,,,
+4.017,ÿπÿ≥ÿ®,,,,,,,,,,,,,,,,
+3.924,ÿßŸÑÿ¥ÿßÿ±ÿ¨ÿ©,,,,,,,,,,,,,,,,

Weight?,Feature
+6.215,ÿßŸÑÿ≠ŸäŸÜŸá
+5.075,ŸÖÿ®
+5.067,ÿ¥ÿ±ÿßÿ™
+4.852,ŸäÿßŸÑÿ≥
+4.568,ŸÜÿßÿØŸäŸÜ_ŸÜÿ≥Ÿäÿ®_ŸÜÿ¨ŸäŸÖ
+4.355,ÿ®Ÿàÿ∏ÿ®Ÿä
+4.199,ÿπŸäŸÜÿßŸàŸä
+4.128,ŸÜÿ®ÿß
+4.017,ÿπÿ≥ÿ®
+3.924,ÿßŸÑÿ¥ÿßÿ±ÿ¨ÿ©

Weight?,Feature
+8.412,ÿπÿ®ÿØÿßŸÑÿπÿ≤Ÿäÿ≤_ÿßŸÑÿ±ŸàŸäÿ≠Ÿä
+5.878,ÿ≥ŸÖÿ±_ÿßŸÑÿ®ÿ≠ÿ±ŸäŸÜŸäÿ©
+5.383,ÿ≥ŸÖÿ±_ÿßŸÑÿ®ÿ≠ÿ±ŸäŸÜŸäŸá
+5.338,ÿ∫ÿ±ÿØ_ÿ®ŸÅÿ™ŸàŸâ
+5.148,ÿßŸÑŸÖÿ≠ÿ±ŸÇ
+4.537,ÿßÿ≠ŸäŸÜ
+4.482,ÿµÿ¨
+4.275,ŸÑŸäŸä
+3.957,ÿ™ÿ±ŸÉŸä_ÿßŸáÿ®ÿ∑_ÿßŸÑÿßÿ™ÿ≠ÿßÿØ
+3.952,ÿ®ÿÆŸäŸäÿ±

Weight?,Feature
+8.127,Ÿàÿßÿ¥
+6.591,ÿ®ÿ≤ÿßŸÅ
+6.209,ÿ≥ŸÜÿßÿ®ÿßÿ™_ÿ∫ÿßÿ≤Ÿä_ÿßŸÑŸÖÿ∑Ÿäÿ±Ÿä
+6.043,ÿ≥ŸáŸäŸÑÿ©
+5.987,ÿ±ÿßŸá
+5.785,ÿ¥ŸÉŸàŸÜ
+5.356,ÿ®ÿßÿ¥
+5.338,ÿ®ÿ±ŸÉ
+5.237,ÿ≤ÿØ_ÿ±ÿµŸäÿØŸÉ
+5.019,ÿπŸÑÿßÿ¥

Weight?,Feature
+4.924,ÿπŸÑŸäŸÉŸâ
+4.769,ÿßŸàŸâ
+4.476,ÿ•ŸÑŸÑŸâ
+4.060,ÿØŸÑŸàŸÇÿ™Ÿâ
+3.953,ÿßÿ≥ŸÉŸÜÿØÿ±ŸäŸá
+3.945,ÿßŸÑÿ∫ÿßŸÑŸâ
+3.919,ÿØŸá
+3.805,ŸàÿßŸÜÿ™Ÿâ
… 81085 more positive …,… 81085 more positive …
… 368198 more negative …,… 368198 more negative …

Weight?,Feature
+7.271,ŸáŸäÿ¨
+6.448,ŸÑÿπÿØ
+5.489,ŸáŸàÿßŸä
+4.863,ÿµÿØŸÉ
+4.794,ŸäŸÉŸàŸÑ
+4.781,ŸÖÿßŸÉŸà
+4.738,ÿßÿ≠ŸÜŸá
+4.723,Ÿáÿ∞ŸàŸÑŸá
+4.717,ÿ¥ŸÉÿØ
+4.640,ÿ®ÿßŸÑÿπÿ±ÿßŸÇ

Weight?,Feature
+8.241,ÿßÿ¥Ÿä
+7.926,ÿ™ÿ¥ŸàŸäÿ¥_Ÿàÿßÿ∂ÿ≠
+5.939,ŸÖÿ®ÿßÿØÿ±Ÿá_ÿ®ŸÑÿßŸÑ_ÿßŸÑŸÖÿßÿ∂Ÿä
+5.824,Ÿáÿ≥ÿß
+5.042,ÿßŸÑŸÅŸäÿµŸÑŸä
+4.919,Ÿäÿ≤ŸÖ
+4.629,ÿ®ŸÑÿßŸÑ_ÿßŸÑŸÖÿßÿ∂Ÿä
+4.594,ÿ®ÿßŸÑÿßÿ±ÿØŸÜ
+4.511,ÿßŸÜÿØÿßÿ±Ÿä
+4.392,ÿ≤ÿØ_ÿ±ÿµŸäÿØŸÉ

Weight?,Feature
+6.851,ÿπÿ®ÿØÿßŸÑŸÑŸá_ÿßŸÑÿπÿµŸäÿØÿßŸÜ
+5.165,ŸÉÿ±ÿ®ÿßÿ¨
+5.149,ÿßŸÑÿ®ÿØŸàŸÜ
+5.050,ÿßŸÑŸÇÿßÿØÿ≥ŸäŸá
+5.020,ŸÅŸÜÿ∫ÿ±
+4.863,ÿµÿ¨
+4.730,ÿ™ŸÉŸÅŸá
+4.586,ŸÜÿßÿ∑ÿ±_ÿ®Ÿäÿ™
+4.573,ÿ¨ÿ∞Ÿä
+4.360,ÿ£ŸÑŸÑŸá

Weight?,Feature
+5.800,ŸáŸÑŸÇ
+5.317,ŸáŸàÿß_ÿßŸÑÿ≠ÿ±Ÿäÿ©
+5.208,ŸáŸäÿØÿß
+5.131,ŸÑÿ®ŸÜÿßŸÜ_ŸäŸÜÿ™ŸÅÿ∂
+5.012,ŸáŸàŸÑ
+4.941,ÿßÿ¨ÿß_ŸàŸÇÿ™_ŸÜÿ≠ÿßÿ≥ÿ®
+4.815,ÿ£ŸÑŸÑŸá
+4.601,ŸÇÿØÿ≠_Ÿàÿ¨ŸÖ
+4.536,ŸäŸÑŸÑŸä
+4.272,ŸáŸàŸä

Weight?,Feature
+13.636,ŸÇÿµŸÇÿµ
+12.689,ŸáŸÉŸä
+11.461,ÿ¥ŸÜ
+9.431,ŸáŸÑÿ®ÿß
+9.330,ÿ®ŸÜÿ∫ÿßÿ≤Ÿä
+8.492,Ÿáÿ∂ÿß
+8.045,ÿ™Ÿàÿß
+6.662,ŸÑŸäÿ®Ÿä
+6.229,ÿßŸÑÿ≥ÿ±ÿßÿ¨
+5.837,ÿßŸÑŸÑŸäÿ®ŸäŸäŸÜ

Weight?,Feature
+10.032,ÿØŸäÿßŸÑ
+7.815,Ÿàÿßÿ¥
+7.401,ÿ±ÿßŸá
+6.892,ŸáÿßÿØÿ¥Ÿä
+6.778,ÿØÿßÿ®ÿß
+6.313,ÿ®ÿ≤ÿßŸÅ
+6.224,ÿ≠Ÿäÿ™
+6.139,ÿßŸáÿßÿ®
+5.917,ÿ®ÿßÿ¥
+5.752,ÿØŸäÿßŸÑŸä

Weight?,Feature
+5.738,ÿµÿ≠ÿßÿ±
+5.051,ŸÖÿ≥ŸÇÿ∑
+4.785,ŸÜÿ≤ŸäŸÜ
+4.275,ÿµŸÑÿßŸÑŸá
+4.099,ŸÖŸàŸá
+4.092,ÿπÿ±ÿßŸÇ
+3.934,ÿπŸÖÿßŸÜŸäŸàŸÜ_ÿ®ŸÑÿß_Ÿàÿ∏ÿßŸäŸÅ
+3.903,ŸÖÿ®
+3.898,ÿπŸÖÿßŸÜŸä
+3.786,ŸÉÿ∞ÿßŸÉ

Weight?,Feature
+8.034,ÿßÿ¥Ÿä
+6.489,ŸáŸÑŸÇŸäÿ™
+5.752,ÿ¥ŸäŸÉŸÑ
+5.731,ŸÖÿ≠ŸÖÿØ_ÿπÿ≥ÿßŸÅ
+5.147,ŸáÿßÿØÿß
+4.833,ÿ®ÿ∫ÿ≤ÿ©
+4.719,ŸÅÿ¥
+4.532,ŸäÿÆŸà
+4.409,ƒ±≈ü
+4.286,ÿ•ÿ¥Ÿä

Weight?,Feature
+6.520,ÿßŸÑÿ∫ÿ±ÿßŸÅŸá
+5.825,ÿ™ŸÖŸäŸÖ_ÿßŸÑŸÖÿ¨ÿØ
+5.377,ÿ±ŸäŸÉŸä
+4.724,ÿßŸÑŸÖŸáŸÑŸÉŸá
+4.537,ŸÖÿ®
+4.444,ÿØŸÑŸäŸÖ
+4.403,ÿµÿ¨
+4.130,ŸÖŸáÿ®
+3.961,ÿÆŸÜŸàÿ±
+3.824,ÿßŸÑÿ±ŸäÿßŸÜ

Weight?,Feature
+7.384,ŸáŸÖÿ´ŸàŸÜ
+7.079,ÿßŸÑŸÖŸáÿØŸä_ÿ™ÿ±ŸÉŸä
+4.992,ÿπŸàÿ∂_ÿßŸÑÿπŸÑŸäÿßŸÜŸä
+4.398,ÿ£ÿπÿ¨ÿ®ŸÜŸä
+3.623,ÿ®ÿØŸÜŸÉ
+3.520,ŸäÿßÿπŸÖÿØŸá
+3.466,ŸÑÿßŸáŸÜÿ™
+3.267,ŸÑÿßÿπÿØŸÖÿ™ŸÉ
+3.244,ÿ≥ÿßŸäÿ±
+3.210,ÿßŸÑÿ∑ŸàÿßŸÇŸä

Weight?,Feature
+7.388,ŸÑŸäŸáŸà
+7.381,ŸäÿßÿÆ
+6.681,ÿØŸäŸÑ
+6.164,ŸÅŸäŸáŸà
+6.048,ÿ≤ŸàŸÑ
+5.976,ÿ≥ÿßŸä
+5.442,ŸÖŸàŸÉÿ®
+5.328,ÿ≤ÿßÿ™Ÿà
+5.210,ÿßŸÑÿ≤ŸàŸÑ
+5.114,ÿ∫ÿßŸäÿ™Ÿà

Weight?,Feature
+5.224,ŸáŸÑŸÇ
+4.141,ÿßŸÑÿπŸÅÿßÿ±Ÿäÿ™
+4.133,ÿ£ŸÜŸà
+4.123,Ÿäÿßÿßÿßÿ≠ÿ±Ÿäÿ©
+4.085,ŸáÿßÿØ
+4.073,ŸáŸÜŸÜ
+4.067,Ÿ†Ÿ†
+3.967,ŸáÿØŸàŸÑ
+3.836,ŸÖÿ¥ÿßŸÜ
+3.688,ŸáŸäŸÉ

Weight?,Feature
+8.039,ŸÜÿ≥ŸäŸÖ
+7.291,ÿ¨ŸàŸÑŸäÿß
+6.099,ÿ®ÿßÿ¥
+5.901,ŸÖÿßÿ∫Ÿä
+5.860,ÿ™Ÿàÿß
+5.830,ŸäÿπŸäÿ¥ŸÉ
+5.597,ÿ¥ŸÉŸàŸÜ
+5.526,ÿ®ÿßŸáŸä
+5.238,ÿ™ŸàŸÜÿ≥Ÿäÿ©
+5.170,ÿ™Ÿàÿ©

Weight?,Feature
+6.715,ÿ™ÿπÿ≤
+5.113,ÿπŸÅÿßÿ¥
+4.826,ÿπÿØŸÜ
+4.703,ÿ®ÿßŸÑŸäŸÖŸÜ
+4.306,ÿßŸÇŸÑÿßŸÖ_ÿ™ŸÉ
+4.010,ÿßŸÑŸäŸÖŸÜŸä
+3.975,ÿµŸÜÿπÿßÿ°
+3.952,ÿßŸÑŸäŸÖŸÜ
+3.740,ŸäŸÖŸÜŸä
+3.159,ÿ¥Ÿâ


In [14]:
pipe.predict(['ŸÖÿ¥ÿßŸÜ ÿßŸÑŸÑŸá ŸÑÿß ÿ™ŸÇŸàÿµŸÜŸâ ŸÖÿπŸÑŸÖ'])

array(['SY'], dtype=object)

### Saving our model

In [15]:
joblib.dump(pipe, 'model.pkl')

['model.pkl']