# Setup Notebook

## Mount to Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

## Install dependencies

## Load Imports

# Train Model

## Load Data

In [None]:
def read_data(path: str, name: str):
    '''Reads csv file
    
    Args:
        path (str): parent directory to file
        name (str): type of csv to load (train or test)
        
    Returns:
        pandas.core.frame.DataFrame of joined bodies and stances        
    '''
    df = pd.read_csv(f'{path}/{name}.csv')
    return df

In [None]:
def extract_columns(df: pd.core.frame.DataFrame, columns: list = ['Headline', 'articleBody', 'Stance'],
                    new_columns: list = ['text_a', 'text_b', 'labels']):
    '''Load certain columns of dataframe and transform to desired format
    
    Args:
        df (pd.core.frame.DataFrame): DataFrame of extracted columns
        columns (list): Column names to extract
        new_columns (list): New column names
    
    Returns:
        Extracted dataframe with new columns
    '''
    processed_df = df[columns]
    processed_df.columns = new_columns
    return processed_df

In [None]:
def encod_labels(df):
    '''Encod label strings to ints
    
    Args:
        df (pd.core.frame.DataFrame): DataFrame to encod labels
    
    Returns:
        df (pd.core.frame.DataFrame) with encoded labels
    '''
    labels = df['labels'].unique()
    label_dict = {}
    
    for i, l in enumerate(labels):
        label_dict[l] = i
    
    encoded_df = df.replace({"labels": label_dict})
    return encoded_df, label_dict

In [None]:
def load_data(path: str, name: str):
    '''Read and process csv to desired format
    
    Args:
        path (str): parent directory to file
        name (str): type of csv to load (train or test)
    
    Returns:
        Preprocessed data
    '''
    df = read_data(path, name)
    processed_df = extract_columns(df)
    encoded_df, l2e = encod_labels(processed_df)
    return encoded_df, l2e

train, l2e = load_data('data', 'train')
test, _ = load_data('data', 'test')