In [None]:
import pandas as pd
data = pd.DataFrame({
    'text': ['meow meow yawn', 'nyan yawn ow', 'yawn woof woof', 'hong ow', 'yawn ow', 'hong ow', 'woof hong', 'nyan hong'] ,
    'label': ['cat', 'cat', 'dog', 'dog', 'camel', 'camel', 'dog', 'dog'] 
})
data

Unnamed: 0,text,label
0,meow meow yawn,cat
1,nyan yawn ow,cat
2,yawn woof woof,dog
3,hong ow,dog
4,yawn ow,camel
5,hong ow,camel
6,woof hong,dog
7,nyan hong,dog


In [None]:
import pandas as pd
from sklearn.feature_extraction import CountVectorizer
from nltk import word_tokenize
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load data
data = pd.DataFrame({
    'text': ['meow meow yawn', 'nyan yawn ow', 'yawn woof woof', 'hong ow', 'yawn ow', 'hong ow', 'woof hong', 'nyan hong'] * 50,
    'label': ['cat','cat', 'dog', 'dog', 'camel', 'camel', 'dog','dog'] * 50
})

# Prepare data
train, dev_test = train_test_split(data, train_size=0.8, random_state=140)
dev, test = train_test_split(dev_test, test_size=0.5, random_state=140)

vectorizer = CountVectorizer()
train_feature_vectors = vectorizer.fit_transform(train['text'])

# Train model
lr_text_classifier = LogisticRegression()
lr_text_classifier.fit(train_feature_vectors, train['label'])

# Evaluation
dev_feature_vectors = vectorizer.transform(dev['text'])
dev_predictions = lr_text_classifier.predict(dev_feature_vectors)
print (classification_report(dev_predictions, dev['label']))

              precision    recall  f1-score   support

       camel       1.00      0.73      0.84        11
         cat       1.00      1.00      1.00         9
         dog       0.87      1.00      0.93        20

    accuracy                           0.93        40
   macro avg       0.96      0.91      0.92        40
weighted avg       0.93      0.93      0.92        40



In [None]:
weights = pd.DataFrame(lr_text_classifier.coef_, 
                       columns=vectorizer.feature_names_, 
                       index=lr_text_classifier.classes_)

In [None]:
weights

Unnamed: 0,hong,meow,nyan,ow,woof,yawn
camel,-0.162228,-1.061361,-2.524037,2.133489,-1.018174,0.161942
cat,-1.348017,2.408819,2.541501,-0.360179,-1.091205,1.3477
dog,1.510244,-1.347458,-0.017464,-1.773311,2.109379,-1.509642


In [None]:
weights.transpose()

Unnamed: 0,camel,cat,dog
hong,-0.162228,-1.348017,1.510244
meow,-1.061361,2.408819,-1.347458
nyan,-2.524037,2.541501,-0.017464
ow,2.133489,-0.360179,-1.773311
woof,-1.018174,-1.091205,2.109379
yawn,0.161942,1.3477,-1.509642


In [None]:
weights.transpose().sort_values('camel', ascending=False)

Unnamed: 0,camel,cat,dog
ow,2.133489,-0.360179,-1.773311
yawn,0.161942,1.3477,-1.509642
hong,-0.162228,-1.348017,1.510244
woof,-1.018174,-1.091205,2.109379
meow,-1.061361,2.408819,-1.347458
nyan,-2.524037,2.541501,-0.017464


In [None]:
weights.transpose().loc['nyan']

camel   -2.524037
cat      2.541501
dog     -0.017464
Name: nyan, dtype: float64

In [None]:
weights.to_excel('weight_animals.xlsx')

In [None]:
pd.DataFrame(lr_text_classifier.intercept_, index=lr_text_classifier.classes_)

Unnamed: 0,0
camel,-0.395317
cat,-1.406633
dog,1.801951
