In [11]:
from fastai.text.all import *

In [6]:
df = pd.read_csv('./expenses.csv')
df.head()

Unnamed: 0,description,category
0,TRADER JOE S #068,Groceries & Food
1,WHOLEFDS SVL#10707,Groceries & Food
2,Amazon.com,Amazon
3,OPENAI,Subscriptions & Memberships
4,Amazon.com*NH99Y5J73,Amazon


In [13]:
dls = TextDataLoaders.from_csv(
    path='.',
    csv_fname='expenses.csv',
    text_col='description',
    label_col='category')
dls.show_batch(max_n=3)

Unnamed: 0,text,category
0,xxbos xxup amzn xxmaj mktp xxup us * xxrep 3 xxunk xxup xxunk,Amazon
1,xxbos xxup tst * xxmaj xxunk xxmaj xxunk xxmaj xxunk - xxmaj los,Groceries & Food
2,xxbos xxup xxunk xxup xxunk xxup xxunk ( xxunk ) xxunk / xxunk,Clothing & Personal Care


In [34]:
train_size = len(dls.train_ds)
valid_size = len(dls.valid_ds)

print(f"Training size: {train_size}")
print(f"Validation size: {valid_size}")

Training size: 134
Validation size: 33


# Fine tuning

In [52]:
learn_finetune = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [53]:
learn_finetune.fine_tune(3, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.734694,2.557105,0.242424,00:00


epoch,train_loss,valid_loss,accuracy,time
0,2.199244,2.431459,0.454545,00:00
1,2.064721,2.16983,0.484848,00:00
2,1.902278,2.04946,0.545455,00:00


In [54]:
learn_finetune.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md,Clothing & Personal Care,Clothing & Personal Care
1,xxbos xxup sq * xxunk xxup xxunk xxup los xxup xxunk,Groceries & Food,Groceries & Food
2,xxbos xxup everlane * xxunk xxrep 3 xxunk xxunk,Clothing & Personal Care,Clothing & Personal Care
3,xxbos xxup adidas xxup us xxup online xxup store,Clothing & Personal Care,Clothing & Personal Care
4,xxbos xxup xxunk xxup xxunk xxunk xxup online,Groceries & Food,Clothing & Personal Care
5,xxbos xxup sp xxup xxunk - xxunk,Clothing & Personal Care,Clothing & Personal Care
6,xxbos xxup amzn xxmaj mktp xxup xxunk,Amazon,Amazon
7,xxbos xxup sp xxup xxunk xxup xxunk,Clothing & Personal Care,Clothing & Personal Care
8,xxbos xxup xxunk * xxunk xxmaj xxunk,Subscriptions & Memberships,Clothing & Personal Care


In [55]:
learn_finetune.export('120523-finetune.pkl')

Path('models/120523-finetune.pth')

In [56]:
learn_finetune.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([0.2734, 0.0430, 0.0468, 0.0565, 0.0150, 0.0587, 0.0633, 0.0321, 0.0772,
         0.0820, 0.0592, 0.0906, 0.0355, 0.0668]))

# Fit one cycle

In [57]:
learn_fit = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [58]:
learn_fit.fit_one_cycle(3)

epoch,train_loss,valid_loss,accuracy,time
0,2.822345,2.630266,0.090909,00:00
1,2.753336,2.568347,0.484848,00:00
2,2.647412,2.536277,0.515152,00:00


In [59]:
learn_fit.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md,Clothing & Personal Care,Clothing & Personal Care
1,xxbos xxup sq * xxunk xxup xxunk xxup los xxup xxunk,Groceries & Food,Clothing & Personal Care
2,xxbos xxup adidas xxup us xxup online xxup store,Clothing & Personal Care,Clothing & Personal Care
3,xxbos xxup everlane * xxunk xxrep 3 xxunk xxunk,Clothing & Personal Care,Clothing & Personal Care
4,xxbos xxup xxunk xxup xxunk xxunk xxup online,Groceries & Food,Clothing & Personal Care
5,xxbos xxup amzn xxmaj mktp xxup xxunk,Amazon,Amazon
6,xxbos xxup xxunk xxup xxunk xxup xxunk,Groceries & Food,Clothing & Personal Care
7,xxbos xxup amzn xxmaj mktp xxup xxunk,Amazon,Amazon
8,xxbos xxup sp xxup xxunk xxup xxunk,Clothing & Personal Care,Clothing & Personal Care


In [61]:
learn_fit.save('120523-fit')
learn_fit.export('120523-fit.pkl')

Path('models/120523-fit.pth')

In [66]:
learn_fit.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([0.0898, 0.0732, 0.0699, 0.0705, 0.0592, 0.0726, 0.0814, 0.0690, 0.0764,
         0.0631, 0.0669, 0.0679, 0.0675, 0.0728]))

# Model Training Results

## 12-05-2023

### Fine tuning

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.881874   | 2.531036   | 0.333333 | 00:00 |
| 0     | 2.322587   | 2.417079   | 0.454545 | 00:00 |
| 1     | 2.200765   | 2.248553   | 0.484848 | 00:00 |
| 2     | 2.093007   | 2.179276   | 0.484848 | 00:00 |

### Fit one cycle

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.868019   | 2.629553   | 0.030303 | 00:00 |
| 1     | 2.762199   | 2.566640   | 0.363636 | 00:00 |
| 2     | 2.673450   | 2.541624   | 0.515152 | 00:00 |


# References
1. [Text transfer learning](https://docs.fast.ai/tutorial.text.html)
2. [TextDataLoaders.from_csv](https://docs.fast.ai/text.data.html#textdataloaders.from_csv)