In [1]:
from fastai.text.all import *

In [2]:
df = pd.read_csv('./expenses.csv')
df.head()

Unnamed: 0,description,category
0,TRADER JOE S #068 Merchandise,Groceries & Food
1,WHOLEFDS SVL#10707 Groceries,Groceries & Food
2,Amazon.com Shopping,Amazon
3,OPENAI Shopping,Subscriptions & Memberships
4,Amazon.com*NH99Y5J73 Shopping,Amazon


In [3]:
dls = TextDataLoaders.from_csv(
    path='.',
    csv_fname='expenses.csv',
    text_col='description',
    label_col='category')
dls.show_batch(max_n=3)

Unnamed: 0,text,category
0,xxbos xxup target xxup debit xxup crd xxup ach xxup tran xxrep 3 0 716475562584 xxup pos xxup i d : 1410215170,Target
1,xxbos xxup target xxup debit xxup crd xxup ach xxup tran xxrep 3 0 xxunk xxup web xxup i d : 1410215170,Target
2,xxbos xxup target xxup debit xxup crd xxup ach xxup tran xxrep 3 0 716475562584 xxup pos xxup i d : 1410215170,Target


In [4]:
train_size = len(dls.train_ds)
valid_size = len(dls.valid_ds)

print(f"Training size: {train_size}")
print(f"Validation size: {valid_size}")

Training size: 358
Validation size: 89


# Fine tuning

In [5]:
learn_finetune = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [6]:
learn_finetune.fine_tune(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.679732,2.395353,0.426966,00:06


epoch,train_loss,valid_loss,accuracy,time
0,1.853211,2.191134,0.494382,00:01
1,1.718374,1.739009,0.617977,00:00
2,1.543114,1.338402,0.662921,00:00
3,1.39546,1.184066,0.707865,00:00
4,1.282518,1.130067,0.730337,00:00
5,1.168134,1.080931,0.730337,00:00
6,1.066791,1.072051,0.752809,00:00
7,0.972805,1.067995,0.764045,00:00
8,0.910769,1.05947,0.764045,00:00
9,0.851612,1.054028,0.764045,00:00


In [7]:
learn_finetune.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup target xxup debit xxup crd xxup ach xxup tran xxrep 3 0 xxunk xxup web xxup i d : 1410215170,Target,Target
1,xxbos xxup t - mobile xxup pcs xxup svc xxunk xxup web xxup i d : xxrep 4 0 450304,Utilities,Utilities
2,xxbos xxup xxunk - xxunk xxup xxunk - xxunk - xxunk xxmaj bills & xxmaj utilities,Insurance,Clothing & Personal Care
3,xxbos xxup cafe xxup xxunk xxup mountain xxup view : g xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
4,xxbos xxup xxunk xxup loan xxup payment xxup ppd xxup i d : xxunk,Transportation,Transportation
5,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md xxmaj merchandise,Clothing & Personal Care,Clothing & Personal Care
6,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
7,xxbos xxmaj xxunk xxmaj inc xxup xxunk xxup ppd xxup i d : xxunk,Miscellaneous,Housing
8,xxbos xxup sq * xxunk xxup xxunk xxup los xxup xxunk xxmaj dining,Groceries & Food,Groceries & Food


In [8]:
learn_finetune.export('models/latest-finetune.pkl')

In [9]:
learn_finetune.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([9.9193e-01, 4.5418e-06, 4.6912e-04, 4.8144e-04, 3.2887e-06, 1.0540e-03,
         3.9040e-03, 5.9561e-04, 3.3161e-06, 9.0453e-05, 4.9208e-05, 1.7164e-04,
         5.8062e-04, 1.8801e-04, 4.7726e-04]))

# Fit one cycle

In [10]:
learn_fit = text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy)

In [11]:
learn_fit.fit_one_cycle(10)

epoch,train_loss,valid_loss,accuracy,time
0,2.855009,2.684448,0.05618,00:00
1,2.792808,2.604651,0.269663,00:00
2,2.64853,2.422087,0.505618,00:00
3,2.50275,2.171919,0.539326,00:00
4,2.35445,1.945733,0.58427,00:00
5,2.230627,1.764531,0.58427,00:00
6,2.120343,1.635545,0.629214,00:00
7,2.027695,1.568989,0.606742,00:00
8,1.969882,1.519375,0.629214,00:00
9,1.910625,1.509366,0.617977,00:00


In [12]:
learn_fit.show_results()

Unnamed: 0,text,category,category_
0,xxbos xxup target xxup debit xxup crd xxup ach xxup tran xxrep 3 0 xxunk xxup web xxup i d : 1410215170,Target,Target
1,xxbos xxup t - mobile xxup pcs xxup svc xxunk xxup web xxup i d : xxrep 4 0 450304,Utilities,Target
2,xxbos xxup xxunk - xxunk xxup xxunk - xxunk - xxunk xxmaj bills & xxmaj utilities,Insurance,Miscellaneous
3,xxbos xxup cafe xxup xxunk xxup mountain xxup view : g xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
4,xxbos xxup xxunk xxup loan xxup payment xxup ppd xxup i d : xxunk,Transportation,Utilities
5,xxbos xxmaj xxunk xxmaj inc xxup xxunk xxup ppd xxup i d : xxunk,Miscellaneous,Housing
6,xxbos xxup sq * xxunk xxup xxunk xxup xxunk xxmaj food & xxmaj drink,Groceries & Food,Groceries & Food
7,xxbos xxup ua.com * xxrep 3 8 -727 - 6687 xxup md xxmaj merchandise,Clothing & Personal Care,Clothing & Personal Care
8,xxbos xxup sq * xxunk xxup xxunk xxup los xxup xxunk xxmaj dining,Groceries & Food,Groceries & Food


In [13]:
learn_fit.export('models/latest-fit.pkl')

In [14]:
learn_fit.predict('AMZN Mktp US*GW42262P3')

('Amazon',
 tensor(0),
 tensor([9.3262e-01, 3.0360e-04, 1.8889e-03, 5.8329e-03, 2.2046e-03, 5.1053e-03,
         1.3033e-02, 6.4312e-03, 9.5341e-04, 6.4787e-03, 2.7623e-03, 1.9446e-03,
         1.7854e-03, 7.3337e-03, 1.1322e-02]))

# Model Training Results

## 12-05-2023

### Fine tuning

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.804887   | 2.517872   | 0.333333 | 00:00 |
| 0     | 2.155915   | 2.453891   | 0.424242 | 00:00 |
| 1     | 2.023252   | 2.341634   | 0.515152 | 00:00 |
| 2     | 1.836782   | 2.136570   | 0.636364 | 00:00 |
| 3     | 1.706438   | 1.865172   | 0.727273 | 00:00 |
| 4     | 1.591477   | 1.587878   | 0.757576 | 00:00 |
| 5     | 1.477993   | 1.415652   | 0.787879 | 00:00 |
| 6     | 1.391266   | 1.315022   | 0.787879 | 00:00 |
| 7     | 1.316200   | 1.265274   | 0.787879 | 00:00 |
| 8     | 1.248108   | 1.236477   | 0.787879 | 00:00 |
| 9     | 1.189136   | 1.216803   | 0.787879 | 00:00 |


### Fit one cycle

Training size: 134
Validation size: 33

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 2.399887   | 2.523246   | 0.242424 | 00:00 |
| 1     | 2.380246   | 2.472462   | 0.363636 | 00:00 |
| 2     | 2.319030   | 2.388698   | 0.454545 | 00:00 |
| 3     | 2.246946   | 2.302907   | 0.545455 | 00:00 |
| 4     | 2.165878   | 2.222806   | 0.575758 | 00:00 |
| 5     | 2.109444   | 2.143269   | 0.606061 | 00:00 |
| 6     | 2.055223   | 2.077895   | 0.606061 | 00:00 |
| 7     | 2.002399   | 2.026833   | 0.606061 | 00:00 |
| 8     | 1.979071   | 1.984128   | 0.636364 | 00:00 |
| 9     | 1.940246   | 1.944949   | 0.666667 | 00:00 |

## 01-02-2024

### Fine tuning

Training size: 252
Validation size: 63

| epoch | train_loss | valid_loss | accuracy | time   |
|-------|------------|------------|----------|--------|
| 0     | 2.160652   | 2.428968   | 0.650794 | 00:01  |
| 1     | 2.001271   | 2.175531   | 0.730159 | 00:00  |
| 2     | 1.840311   | 1.759516   | 0.714286 | 00:00  |
| 3     | 1.691128   | 1.465099   | 0.746032 | 00:00  |
| 4     | 1.563053   | 1.427480   | 0.682540 | 00:00  |
| 5     | 1.468787   | 1.289794   | 0.698413 | 00:00  |
| 6     | 1.366393   | 1.177989   | 0.730159 | 00:00  |
| 7     | 1.286151   | 1.128382   | 0.730159 | 00:00  |
| 8     | 1.227368   | 1.098116   | 0.730159 | 00:00  |
| 9     | 1.166456   | 1.082516   | 0.730159 | 00:00  |

## 02-02-2024

### Fine tuning

Training size: 298
Validation size: 74

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
| 0     | 1.777669   | 2.196177   | 0.689189 | 00:01 |
| 1     | 1.656596   | 1.718567   | 0.702703 | 00:00 |
| 2     | 1.501173   | 1.255754   | 0.716216 | 00:00 |
| 3     | 1.354056   | 1.084962   | 0.756757 | 00:00 |
| 4     | 1.225033   | 1.005001   | 0.743243 | 00:00 |
| 5     | 1.119112   | 0.882296   | 0.756757 | 00:00 |
| 6     | 1.042848   | 0.829607   | 0.770270 | 00:00 |
| 7     | 0.973451   | 0.828626   | 0.770270 | 00:00 |
| 8     | 0.907481   | 0.826008   | 0.770270 | 00:00 |
| 9     | 0.872404   | 0.831227   | 0.770270 | 00:00 |

## 03-01-2024

### Fine tuning

Training size: 358
Validation size: 89

| epoch | train_loss | valid_loss | accuracy | time  |
|-------|------------|------------|----------|-------|
|   0   |  1.853211  |  2.191134  |  0.494382| 00:01 |
|   1   |  1.718374  |  1.739009  |  0.617977| 00:00 |
|   2   |  1.543114  |  1.338402  |  0.662921| 00:00 |
|   3   |  1.395460  |  1.184066  |  0.707865| 00:00 |
|   4   |  1.282518  |  1.130067  |  0.730337| 00:00 |
|   5   |  1.168134  |  1.080931  |  0.730337| 00:00 |
|   6   |  1.066791  |  1.072051  |  0.752809| 00:00 |
|   7   |  0.972805  |  1.067995  |  0.764045| 00:00 |
|   8   |  0.910769  |  1.059470  |  0.764045| 00:00 |
|   9   |  0.851612  |  1.054028  |  0.764045| 00:00 |

# References
1. [Text transfer learning](https://docs.fast.ai/tutorial.text.html)
2. [TextDataLoaders.from_csv](https://docs.fast.ai/text.data.html#textdataloaders.from_csv)