# Setup Notebook

## Install Dependencies

In [None]:
! pip install simpletransformers

## Load Imports

In [None]:
import numpy as np
import pandas as pd
import sklearn

from pathlib import Path
from simpletransformers.language_modeling import LanguageModelingModel, LanguageModelingArgs

# Configuration File

In [None]:
config = {'data': {'subset': 1,
                   'split': 1,
                   'columns': ['Headline', 'articleBody', 'Stance'],
                  #  'columns': ['Headline', 'articleBody', 'related'],
                  },
          
          'model': {
#                     'model_type': 'roberta',
#                     'model_name': 'roberta-base', #roberta-large
                    'model_type': 'bert',
                    'model_name': 'bert-base-uncased',
                   },
    
          'training':{'learning_rate':1e-5,
                      'num_train_epochs': 1,
                      'train_batch_size': 4,
                      'eval_batch_size': 4,
                      'sliding_window': True,
                      }
}

In [None]:
path = Path('../input/fnc-1/')

! ls {path}

# Load Data

In [None]:
def read_data(path: str, name: str):
    '''Reads csv file
    
    Args:
        path (str): parent directory to file
        name (str): type of csv to load (train or test)
        
    Returns:
        pandas.core.frame.DataFrame of joined bodies and stances        
    '''
    df = pd.read_csv(f'{path}/{name}.csv', encoding='utf-8')
    return df

In [None]:
def load_data(path: str, name: str, config: dict):
    '''Read and process csv to desired format
    
    Args:
        path (str): parent directory to file
        name (str): type of csv to load (train or test)
        config (dict): configuration of data loading parameters
    
    Returns:
        Preprocessed data
    '''
    df = read_data(path, name)
    processed_df = df[config['columns']]
    
    text = np.append(processed_df['Headline'].values, processed_df['articleBody'].values)

    return text

In [None]:
# train = load_data(path, 'train', config['data'])
# test = load_data(path, 'test', config['data'])

# Save File (Run Once)

In [None]:
# with open('train.txt', 'w') as f:
#     for item in train:
#         f.write("%s\n" % item)

In [None]:
# with open('test.txt', 'w') as f:
#     for item in test:
#         f.write("%s\n" % item)

# Train Model

In [None]:
model = LanguageModelingModel(config['model']['model_type'], config['model']['model_name'], args=config['training'])

In [None]:
train_file = f'{path}/train.txt'
test_file = f'{path}/test.txt'

In [None]:
model.train_model(train_file)

# Evaluate Model

In [None]:
result = model.eval_model(test_file)
result

In [None]:
model.save_model()