In [1]:
import logging
import torch
import numpy as np
import pandas as pd

from loading import load_train
from preprocessing import remove_tags
from evaluation import evaluate, evaluate_model
from submission import prepare_submission, prepare_model_submission

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to /Users/jonas/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/jonas/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
logging.basicConfig(level=logging.INFO)

## Components

### Loading training data

In [3]:
df = load_train(full=False)
df

Unnamed: 0,index,x,y
0,0,<user> i dunno justin read my mention or not ....,1
1,1,"because your logic is so dumb , i won't even c...",1
2,2,""" <user> just put casper in a box ! "" looved t...",1
3,3,<user> <user> thanks sir > > don't trip lil ma...,1
4,4,visiting my brother tmr is the bestest birthda...,1
...,...,...,...
199995,199995,can't wait to fake tan tonight ! hate being pa...,-1
199996,199996,<user> darling i lost my internet connection ....,-1
199997,199997,kanguru defender basic 4 gb usb 2.0 flash driv...,-1
199998,199998,rizan is sad now\n,-1


### Preprocessing

In [4]:
remove_tags(df)
df

Unnamed: 0,index,x,y
0,0,i dunno justin read my mention or not . only j...,1
1,1,"because your logic is so dumb , i won't even c...",1
2,2,""" just put casper in a box ! "" looved the bat...",1
3,3,thanks sir > > don't trip lil mama ... just ke...,1
4,4,visiting my brother tmr is the bestest birthda...,1
...,...,...,...
199995,199995,can't wait to fake tan tonight ! hate being pale,-1
199996,199996,darling i lost my internet connection .. and i...,-1
199997,199997,kanguru defender basic 4 gb usb 2.0 flash driv...,-1
199998,199998,rizan is sad now,-1


### Testing model

In [5]:
y = torch.tensor(df['y'])
y_ones = torch.ones_like(y)

In [6]:
evaluate(y, y_ones)
evaluate(y, -y_ones)

INFO:root:---
* accuracy: 0.5
* precision: 0.5
* recall: 1.0
* f1: 0.6666666666666666
* bce: 17.269787996170436
* auc: 0.5
---
  _warn_prf(average, modifier, msg_start, len(result))
INFO:root:---
* accuracy: 0.5
* precision: 0.0
* recall: 0.0
* f1: 0.0
* bce: 17.269388197455328
* auc: 0.5
---


(0.5, 0.0, 0.0, 0.0, 17.269388197455328, 0.5)

### Submitting model

Generates submission file.

In [7]:
prepare_submission(y_ones, file='test_submission.csv')

## Pulling it all together

In [8]:
def model(df: pd.DataFrame) -> np.array:
  # training if df has column `y`; evaluating otherwise

  remove_tags(df)
  return np.ones(df['x'].shape, dtype=np.int32)

In [9]:
df_train = load_train(full=False)
evaluate_model(model, df_train)

INFO:root:---
* accuracy: 0.5
* precision: 0.5
* recall: 1.0
* f1: 0.6666666666666666
* bce: 17.269787996170436
* auc: 0.5
---


(0.5, 0.5, 1.0, 0.6666666666666666, 17.269787996170436, 0.5)

In [10]:
prepare_model_submission(model)