## **_Using News Data to Predict Movements in the Financial Movements_**

We'll be using two apporaches here:

* Continuous Bag of Words Model
* RNN Models using Word Embeddings

In [27]:
import torch
import torch.utils.data as tud
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import Counter, defaultdict
import operator
import os, math
import numpy as np
import pandas as pd
import random
import copy
import string
import multiprocessing as mp

In [3]:
def word_tokenize(s):
    return s.split()

# set the random seeds so the experiments can be replicated exactly
random.seed(72689)
np.random.seed(72689)
torch.manual_seed(72689)
if torch.cuda.is_available():
    torch.cuda.manual_seed(72689)

# Global class labels.
POS_LABEL = 'up'
NEG_LABEL = 'down'

**Reading in all the Data**

In [7]:
data = pd.read_csv("ProcessedData/CombinedData.csv")
data.drop(columns=['Unnamed: 0'], inplace=True)
data.head()

Unnamed: 0,Title,Date,Content,OpenMove,CloseMove
0,Top U.S. General Praises Iran-Backed Shiite Mi...,2017-01-04,The top commander of the U.S.-led coalition ag...,1.0,1.0
1,Extremists Turn to a Leader to Protect Western...,2017-01-04,As the founder of the Traditionalist Worker Pa...,1.0,1.0
2,How Julian Assange evolved from pariah to paragon,2017-01-04,President-elect Donald Trump tweeted some pra...,1.0,1.0
3,House panel recommends cutting funding for Pla...,2017-01-04,A House panel formed by Republicans to invest...,1.0,1.0
4,Missouri Bill: Gun-Banning Businesses Liable f...,2017-01-04,As Missouri lawmakers convene for the 2017 leg...,1.0,1.0


### Preprocessing the Data For Feeding Into The Model

Preprocessing Involves (in our case):
* Turning All Words into lower/upper case
* removing punctuations, accent marks and other diacritics
* removing stop words, sparse terms, and particular words

In [12]:
# Converting all the fields to Lowercase
data['Title'] = data['Title'].str.lower()
data['Content'] = data['Content'].str.lower()
data.head()

Unnamed: 0,Title,Date,Content,OpenMove,CloseMove
0,top u.s. general praises iran-backed shiite mi...,2017-01-04,the top commander of the u.s.-led coalition ag...,1.0,1.0
1,extremists turn to a leader to protect western...,2017-01-04,as the founder of the traditionalist worker pa...,1.0,1.0
2,how julian assange evolved from pariah to paragon,2017-01-04,president-elect donald trump tweeted some pra...,1.0,1.0
3,house panel recommends cutting funding for pla...,2017-01-04,a house panel formed by republicans to invest...,1.0,1.0
4,missouri bill: gun-banning businesses liable f...,2017-01-04,as missouri lawmakers convene for the 2017 leg...,1.0,1.0


In [29]:
# Removing all Punctuation
def rm_punc(text):
    return text.translate(str.maketrans('', '', string.punctuation))

def remove_punctuation(df):
    df['Title'] = df['Title'].apply(rm_punc)
    df['Content'] = df['Content'].apply(rm_punc)
    return df
s
# Processing in Parallel
n_threads = mp.cpu_count()-1
data_pieces = np.array_split(data, n_threads)

pool = mp.Pool(n_threads)
data = pd.concat(pool.map(remove_punctuation, data_pieces))
pool.close()
pool.join()

data.head()

Unnamed: 0,Title,Date,Content,OpenMove,CloseMove
0,top us general praises iranbacked shiite milit...,2017-01-04,the top commander of the usled coalition again...,1.0,1.0
1,extremists turn to a leader to protect western...,2017-01-04,as the founder of the traditionalist worker pa...,1.0,1.0
2,how julian assange evolved from pariah to paragon,2017-01-04,presidentelect donald trump tweeted some prai...,1.0,1.0
3,house panel recommends cutting funding for pla...,2017-01-04,a house panel formed by republicans to invest...,1.0,1.0
4,missouri bill gunbanning businesses liable for...,2017-01-04,as missouri lawmakers convene for the 2017 leg...,1.0,1.0


(56034, 5)