In [1]:
from fastai.text.all import *

In [2]:
df = pd.read_csv('./expenses.csv')
df.head()

Unnamed: 0,description,category
0,TRADER JOE S #068 Merchandise,Groceries & Food
1,WHOLEFDS SVL#10707 Groceries,Groceries & Food
2,Amazon.com Shopping,Amazon
3,OPENAI Shopping,Subscriptions & Memberships
4,Amazon.com*NH99Y5J73 Shopping,Amazon


In [3]:
dls = TextDataLoaders.from_csv(
    path='.',
    csv_fname='expenses.csv',
    text_col='description',
    label_col='category')
dls.show_batch(max_n=3)

Unnamed: 0,text,category
0,xxbos xxup xxunk xxup xxunk xxup xxunk ( n xxup xxunk xxmaj food & xxmaj drink,Groceries & Food
1,xxbos xxup xxunk - xxunk xxup xxunk - xxunk - xxunk xxmaj bills & xxmaj utilities,Insurance
2,xxbos xxup xxunk xxup xxunk xxup xxunk ( n xxup xxunk xxmaj food & xxmaj drink,Groceries & Food


In [4]:
train_size = len(dls.train_ds)
valid_size = len(dls.valid_ds)

print(f"Training size: {train_size}")
print(f"Validation size: {valid_size}")

Training size: 252
Validation size: 63


# Fine tuning

In [5]:
learn_finetune = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [6]:
learn_finetune.fine_tune(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.806714,2.574252,0.365079,00:02


epoch,train_loss,valid_loss,accuracy,time
0,2.160652,2.428968,0.650794,00:01
1,2.001271,2.175531,0.730159,00:00
2,1.840311,1.759516,0.714286,00:00
3,1.691128,1.465099,0.746032,00:00
4,1.563053,1.42748,0.68254,00:00
5,1.468787,1.289794,0.698413,00:00
6,1.366393,1.177989,0.730159,00:00
7,1.286151,1.128382,0.730159,00:00
8,1.227368,1.098116,0.730159,00:00
9,1.166456,1.082516,0.730159,00:00


In [7]:
learn_finetune.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
1,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md xxmaj merchandise,Clothing & Personal Care,Clothing & Personal Care
2,xxbos xxup xxunk xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
3,xxbos xxup xxunk xxup xxunk xxup market * xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
4,xxbos xxup sq * sunnyvale xxup xxunk 's xxup xxunk xxmaj merchandise,Groceries & Food,Clothing & Personal Care
5,xxbos xxup tst * xxmaj xxunk xxmaj xxunk xxmaj xxunk xxmaj dining,Groceries & Food,Groceries & Food
6,xxbos xxup tea xxup xxunk xxup inc xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
7,xxbos xxup sq * xxunk - xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
8,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj merchandise,Groceries & Food,Groceries & Food


In [8]:
learn_finetune.export('models/120523-finetune.pkl')

In [9]:
learn_finetune.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([0.8309, 0.0024, 0.0115, 0.0061, 0.0046, 0.0154, 0.0190, 0.0095, 0.0076,
         0.0116, 0.0100, 0.0104, 0.0089, 0.0208, 0.0312]))

# Fit one cycle

In [10]:
learn_fit = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [11]:
learn_fit.fit_one_cycle(10)

epoch,train_loss,valid_loss,accuracy,time
0,2.85781,2.707823,0.031746,00:00
1,2.781974,2.673907,0.063492,00:00
2,2.7412,2.588284,0.365079,00:00
3,2.646519,2.481049,0.539683,00:00
4,2.53306,2.355062,0.571429,00:00
5,2.440076,2.235296,0.571429,00:00
6,2.369006,2.127997,0.571429,00:00
7,2.296825,2.038445,0.571429,00:00
8,2.241405,1.96674,0.571429,00:00
9,2.185588,1.889198,0.571429,00:00


In [12]:
learn_fit.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
1,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md xxmaj merchandise,Clothing & Personal Care,Clothing & Personal Care
2,xxbos xxup xxunk xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
3,xxbos xxup xxunk xxup xxunk xxup market * xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
4,xxbos xxup sq * sunnyvale xxup xxunk 's xxup xxunk xxmaj merchandise,Groceries & Food,Entertainment
5,xxbos xxup tea xxup xxunk xxup inc xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
6,xxbos xxup tst * xxmaj xxunk xxmaj xxunk xxmaj xxunk xxmaj dining,Groceries & Food,Amazon
7,xxbos xxup sq * xxunk - xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
8,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj merchandise,Groceries & Food,Groceries & Food


In [13]:
learn_fit.export('models/120523-fit.pkl')

In [14]:
learn_fit.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([0.3002, 0.0287, 0.0375, 0.0578, 0.0339, 0.1109, 0.0711, 0.0243, 0.0399,
         0.0255, 0.0460, 0.0515, 0.0769, 0.0497, 0.0462]))

# Model Training Results

## 12-05-2023

### Fine tuning

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.804887   | 2.517872   | 0.333333 | 00:00 |
| 0     | 2.155915   | 2.453891   | 0.424242 | 00:00 |
| 1     | 2.023252   | 2.341634   | 0.515152 | 00:00 |
| 2     | 1.836782   | 2.136570   | 0.636364 | 00:00 |
| 3     | 1.706438   | 1.865172   | 0.727273 | 00:00 |
| 4     | 1.591477   | 1.587878   | 0.757576 | 00:00 |
| 5     | 1.477993   | 1.415652   | 0.787879 | 00:00 |
| 6     | 1.391266   | 1.315022   | 0.787879 | 00:00 |
| 7     | 1.316200   | 1.265274   | 0.787879 | 00:00 |
| 8     | 1.248108   | 1.236477   | 0.787879 | 00:00 |
| 9     | 1.189136   | 1.216803   | 0.787879 | 00:00 |


### Fit one cycle

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.399887   | 2.523246   | 0.242424 | 00:00 |
| 1     | 2.380246   | 2.472462   | 0.363636 | 00:00 |
| 2     | 2.319030   | 2.388698   | 0.454545 | 00:00 |
| 3     | 2.246946   | 2.302907   | 0.545455 | 00:00 |
| 4     | 2.165878   | 2.222806   | 0.575758 | 00:00 |
| 5     | 2.109444   | 2.143269   | 0.606061 | 00:00 |
| 6     | 2.055223   | 2.077895   | 0.606061 | 00:00 |
| 7     | 2.002399   | 2.026833   | 0.606061 | 00:00 |
| 8     | 1.979071   | 1.984128   | 0.636364 | 00:00 |
| 9     | 1.940246   | 1.944949   | 0.666667 | 00:00 |

## 01-02-2024

### Fine tuning

Training size: 252
Validation size: 63

| epoch | train_loss | valid_loss | accuracy | time   |
|-------|------------|------------|----------|--------|
| 0     | 2.160652   | 2.428968   | 0.650794 | 00:01  |
| 1     | 2.001271   | 2.175531   | 0.730159 | 00:00  |
| 2     | 1.840311   | 1.759516   | 0.714286 | 00:00  |
| 3     | 1.691128   | 1.465099   | 0.746032 | 00:00  |
| 4     | 1.563053   | 1.427480   | 0.682540 | 00:00  |
| 5     | 1.468787   | 1.289794   | 0.698413 | 00:00  |
| 6     | 1.366393   | 1.177989   | 0.730159 | 00:00  |
| 7     | 1.286151   | 1.128382   | 0.730159 | 00:00  |
| 8     | 1.227368   | 1.098116   | 0.730159 | 00:00  |
| 9     | 1.166456   | 1.082516   | 0.730159 | 00:00  |


# References
1. [Text transfer learning](https://docs.fast.ai/tutorial.text.html)
2. [TextDataLoaders.from_csv](https://docs.fast.ai/text.data.html#textdataloaders.from_csv)