In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

import pandas as pd
import numpy as np
import datetime
import time 

In [2]:
options = Options() 
options.add_experimental_option("excludeSwitches", ['enable-automation']) # allows us to scrape bloomberg
#options.add_argument("--disable-notifications") #just added
options.add_argument("--start-maximized")

In [4]:
def fix_ticker_names(ticker):
    if "|" not in ticker:
        return ticker.upper()
    else:
        return ticker.split("|",1)[1].strip().upper()
    
def get_historical_data(ticker):
    ticker_data = "https://finance.yahoo.com/quote/%s/history?p=%s&.tsrc=fin-srch"%(ticker,ticker)
    
    browser = webdriver.Chrome(chrome_options=options, executable_path=r"/usr/bin/chromedriver")
    browser.get(ticker_data)
    time.sleep(3)
    browser.find_element_by_css_selector('a[class*= "Fl(end) Mt(3px) Cur(p)"]').click()
    time.sleep(5)
    browser.quit()
    return
    
def nearest(items, pivot):
    return min(items, key=lambda x: abs(x - pivot))

In [5]:
news_scraping_results = pd.read_csv('./news.csv')
ticker_names = news_scraping_results["SYMBOL"].apply(lambda x: fix_ticker_names(x))
unique_tickers = np.unique(news_scraping_results["SYMBOL"].apply(lambda x: fix_ticker_names(x)))

In [21]:
# grab the historical ticker data from yahoo finance 
for ticker in unique_tickers: 
    get_historical_data(ticker)

  # Remove the CWD from sys.path while we load stuff.


WebDriverException: Message: chrome not reachable
  (Session info: chrome=77.0.3865.90)
  (Driver info: chromedriver=2.41.578700 (2f1ed5f9343c13f73144538f15c00b370eda6706),platform=Linux 5.0.0-31-generic x86_64)


In [7]:
import glob
import os
import re 

path = r'./ticker_historical_data/'
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    ticker_name = re.search('./ticker_historical_data/(.*).csv', filename)
    df.insert(0, column='SYMBOL', value=ticker_name.group(1))
    li.append(df)

ticker_historical_data = pd.concat(li, axis=0, ignore_index=True)
ticker_historical_data['Date'] = pd.to_datetime(ticker_historical_data['Date'], errors='coerce')
ticker_historical_data['Date'] = ticker_historical_data['Date'].apply(lambda x: x.date())

In [8]:
news_scraping_results['SYMBOL'] = news_scraping_results.SYMBOL.apply(lambda x: fix_ticker_names(x))
news_scraping_results['PUBLISH_TIME'] = pd.to_datetime(news_scraping_results['PUBLISH_TIME'], errors='coerce')

In [9]:
#create 11 day forecast for each article
forecast_df = pd.DataFrame(columns=np.concatenate((['SYMBOL', 'URL'], np.arange(11))))

for index, row in news_scraping_results.iterrows():
    
    specific_ticker = ticker_historical_data[ticker_historical_data['SYMBOL'] == row['SYMBOL']]
    specific_ticker = specific_ticker.reset_index(drop=True)
    
    # check to see if publish date is present in stock dates
    if row['PUBLISH_TIME'].date() not in specific_ticker['Date']:
        # check to see if article publish date is within 3 days of an existing stock price 
        # or just grab the closest close stock price date for ticker 
        diff = (specific_ticker['Date'] - row['PUBLISH_TIME'].date())
        
        if diff[(diff < pd.to_timedelta(0))].empty: 
            pseudo_forecast = pd.DataFrame([[row['SYMBOL'],
                                         row['URL']]],
                                   columns=['SYMBOL', 'URL'])
            forecast_df = forecast_df.append(pseudo_forecast, ignore_index=True, sort=False)
        else: 
            indexmax = (diff[(diff < pd.to_timedelta(0))].idxmax())
            specific_ticker.iloc[[indexmax]]

            specific_range = specific_ticker[indexmax-10:indexmax+1]
            specific_range = specific_range.reset_index(drop=True)
            if len(specific_range) < 10: 
                pseudo_forecast = pd.DataFrame([[row['SYMBOL'],
                                             row['URL']]],
                                       columns=['SYMBOL', 'URL'])
                forecast_df = forecast_df.append(pseudo_forecast, ignore_index=True, sort=False)

            else: 
                pseudo_forecast = pd.DataFrame([np.concatenate(([row['SYMBOL'],row['URL']], specific_range['Close'][:11]))],
                                               columns=np.concatenate((['SYMBOL', 'URL'], np.arange(11))))

                forecast_df = forecast_df.append(pseudo_forecast, ignore_index=True, sort=False)

    else: 
        article_date_index = specific_ticker[specific_ticker['Date']==row['PUBLISH_TIME'].date()].index[0]
        specific_range = specific_ticker[article_date_index-10:article_date_index+1]
        specific_range = specific_range.reset_index(drop=True)
        
        if len(specific_range) < 11: 
            pseudo_forecast = pd.DataFrame([[row['SYMBOL'],
                                         row['URL']]],
                                   columns=['SYMBOL', 'URL'])
            forecast_df = forecast_df.append(pseudo_forecast, ignore_index=True, sort=False)
        
        else: 
            pseudo_forecast = pd.DataFrame([np.concatenate(([row['SYMBOL'],row['URL']], specific_range['Close'][:11]))],
                                           columns=np.concatenate((['SYMBOL', 'URL'], np.arange(11))))

            forecast_df = forecast_df.append(pseudo_forecast, ignore_index=True, sort=False)


In [10]:
forecast_df[['0','1','2','3','4','5','6','7','8','9','10']] = forecast_df[['0','1','2','3','4','5','6','7','8','9','10']].astype(np.float)

In [11]:
forecast_df['average'] = forecast_df[['0','1','2','3','4','5','6','7','8','9']].mean(axis=1)
forecast_df

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average
0,NKE,http://onlinenewsguru.com/2019/09/01/investors...,80.279999,81.129997,80.529999,82.739998,83.309998,80.440002,82.250000,82.029999,83.480003,85.379997,84.500000,82.156999
1,C,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,69.669998,69.089996,69.199997,68.830002,68.730003,69.510002,71.029999,70.699997,69.959999,70.360001,70.669998,69.707999
2,UFPT,http://www.bnnbloomberg.ca/unilever-subsidiary...,35.270000,35.490002,35.740002,36.099998,37.400002,37.400002,36.150002,36.939999,36.810001,36.349998,36.240002,36.365001
3,TER,http://www.businesskorea.co.kr/news/articleVie...,30.780001,31.900000,32.169998,32.330002,31.490000,31.790001,31.709999,32.110001,32.900002,31.799999,31.930000,31.898000
4,EMMAW,http://www.globenewswire.com/news-release/2019...,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9939,VIA,https://www.straitstimes.com/tech/apple-update...,33.990002,34.200001,33.680000,34.139999,33.860001,33.330002,32.930000,31.480000,31.370001,30.850000,32.700001,32.983001
9940,WBA,https://www.straitstimes.com/world/united-stat...,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,55.939999,55.299999,53.561000
9941,WBA,https://www.straitstimes.com/world/united-stat...,52.830002,52.599998,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,53.044000
9942,WBA,https://www.straitstimes.com/world/europe/glax...,53.630001,54.360001,54.080002,54.410000,55.310001,54.700001,52.830002,52.599998,52.970001,52.759998,52.090000,53.765000


In [13]:
forecast_df['diff'] = forecast_df['10'] - forecast_df['average']
forecast_df

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff
0,NKE,http://onlinenewsguru.com/2019/09/01/investors...,80.279999,81.129997,80.529999,82.739998,83.309998,80.440002,82.250000,82.029999,83.480003,85.379997,84.500000,82.156999,2.343001
1,C,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,69.669998,69.089996,69.199997,68.830002,68.730003,69.510002,71.029999,70.699997,69.959999,70.360001,70.669998,69.707999,0.961999
2,UFPT,http://www.bnnbloomberg.ca/unilever-subsidiary...,35.270000,35.490002,35.740002,36.099998,37.400002,37.400002,36.150002,36.939999,36.810001,36.349998,36.240002,36.365001,-0.124999
3,TER,http://www.businesskorea.co.kr/news/articleVie...,30.780001,31.900000,32.169998,32.330002,31.490000,31.790001,31.709999,32.110001,32.900002,31.799999,31.930000,31.898000,0.032000
4,EMMAW,http://www.globenewswire.com/news-release/2019...,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9939,VIA,https://www.straitstimes.com/tech/apple-update...,33.990002,34.200001,33.680000,34.139999,33.860001,33.330002,32.930000,31.480000,31.370001,30.850000,32.700001,32.983001,-0.283000
9940,WBA,https://www.straitstimes.com/world/united-stat...,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,55.939999,55.299999,53.561000,1.738999
9941,WBA,https://www.straitstimes.com/world/united-stat...,52.830002,52.599998,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,53.044000,1.616000
9942,WBA,https://www.straitstimes.com/world/europe/glax...,53.630001,54.360001,54.080002,54.410000,55.310001,54.700001,52.830002,52.599998,52.970001,52.759998,52.090000,53.765000,-1.675000


In [14]:
forecast_df['percent_change'] = (forecast_df['diff']/forecast_df['average'])*100
forecast_df

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff,percent_change
0,NKE,http://onlinenewsguru.com/2019/09/01/investors...,80.279999,81.129997,80.529999,82.739998,83.309998,80.440002,82.250000,82.029999,83.480003,85.379997,84.500000,82.156999,2.343001,2.851858
1,C,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,69.669998,69.089996,69.199997,68.830002,68.730003,69.510002,71.029999,70.699997,69.959999,70.360001,70.669998,69.707999,0.961999,1.380040
2,UFPT,http://www.bnnbloomberg.ca/unilever-subsidiary...,35.270000,35.490002,35.740002,36.099998,37.400002,37.400002,36.150002,36.939999,36.810001,36.349998,36.240002,36.365001,-0.124999,-0.343733
3,TER,http://www.businesskorea.co.kr/news/articleVie...,30.780001,31.900000,32.169998,32.330002,31.490000,31.790001,31.709999,32.110001,32.900002,31.799999,31.930000,31.898000,0.032000,0.100319
4,EMMAW,http://www.globenewswire.com/news-release/2019...,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9939,VIA,https://www.straitstimes.com/tech/apple-update...,33.990002,34.200001,33.680000,34.139999,33.860001,33.330002,32.930000,31.480000,31.370001,30.850000,32.700001,32.983001,-0.283000,-0.858017
9940,WBA,https://www.straitstimes.com/world/united-stat...,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,55.939999,55.299999,53.561000,1.738999,3.246764
9941,WBA,https://www.straitstimes.com/world/united-stat...,52.830002,52.599998,52.970001,52.759998,52.090000,52.080002,52.930000,53.990002,53.669998,54.520000,54.660000,53.044000,1.616000,3.046527
9942,WBA,https://www.straitstimes.com/world/europe/glax...,53.630001,54.360001,54.080002,54.410000,55.310001,54.700001,52.830002,52.599998,52.970001,52.759998,52.090000,53.765000,-1.675000,-3.115411


In [15]:
forecast_df.to_csv('news_forecast.csv')

In [32]:
def label_sentiment(val, negative=-2.50, positive=2.50):
    if pd.isna(val): 
        return np.nan
    else: 
        if val <= negative: 
            return 'negative'
        elif val >= positive:
            return 'positive'
        else:
            return 'neutral'


forecast_df['sentiment'] = forecast_df['percent_change'].apply(lambda x: label_sentiment(x))
forecast_df

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff,percent_change,sentiment
0,NKE,http://onlinenewsguru.com/2019/09/01/investors...,80.279999,81.129997,80.529999,82.739998,83.309998,80.440002,82.250000,82.029999,83.480003,85.379997,84.500000,82.156999,2.343001,2.851858,positive
1,C,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,69.669998,69.089996,69.199997,68.830002,68.730003,69.510002,71.029999,70.699997,69.959999,70.360001,70.669998,69.707999,0.961999,1.380040,neutral
2,UFPT,http://www.bnnbloomberg.ca/unilever-subsidiary...,35.270000,35.490002,35.740002,36.099998,37.400002,37.400002,36.150002,36.939999,36.810001,36.349998,36.240002,36.365001,-0.124999,-0.343733,neutral
3,TER,http://www.businesskorea.co.kr/news/articleVie...,30.780001,31.900000,32.169998,32.330002,31.490000,31.790001,31.709999,32.110001,32.900002,31.799999,31.930000,31.898000,0.032000,0.100319,neutral
4,EMMAW,http://www.globenewswire.com/news-release/2019...,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2569,BYND,https://www.usatoday.com/story/money/2019/10/0...,142.990005,138.320007,154.339996,151.660004,148.619995,146.419998,143.300003,145.440002,145.720001,145.059998,142.729996,146.187001,-3.457005,-2.364783,neutral
2570,MSFT,https://www.wsj.com/articles/bright-horizons-t...,120.220001,117.050003,117.660004,117.910004,116.769997,116.930000,117.940002,119.019997,119.190002,119.970001,119.360001,118.266001,1.094000,0.925033,neutral
2571,AHH,https://www.wtrf.com/local/main-street-bank-an...,16.750000,16.700001,16.799999,16.790001,16.840000,16.940001,17.059999,17.020000,17.139999,17.240000,17.150000,16.928000,0.222000,1.311437,neutral
2572,TER,https://www.yahoo.com/entertainment/hgtvs-wind...,46.230000,45.450001,45.939999,46.650002,47.910000,48.830002,47.549999,47.299999,46.910000,46.650002,46.349998,46.942000,-0.592002,-1.261136,neutral


In [33]:
forecast_df[forecast_df['sentiment']=='neutral']

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff,percent_change,sentiment
1,C,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,69.669998,69.089996,69.199997,68.830002,68.730003,69.510002,71.029999,70.699997,69.959999,70.360001,70.669998,69.707999,0.961999,1.380040,neutral
2,UFPT,http://www.bnnbloomberg.ca/unilever-subsidiary...,35.270000,35.490002,35.740002,36.099998,37.400002,37.400002,36.150002,36.939999,36.810001,36.349998,36.240002,36.365001,-0.124999,-0.343733,neutral
3,TER,http://www.businesskorea.co.kr/news/articleVie...,30.780001,31.900000,32.169998,32.330002,31.490000,31.790001,31.709999,32.110001,32.900002,31.799999,31.930000,31.898000,0.032000,0.100319,neutral
6,ARR,http://www.globenewswire.com/news-release/2019...,18.780001,18.889999,18.840000,18.440001,18.580000,18.639999,18.690001,18.600000,18.440001,18.580000,18.620001,18.648000,-0.027999,-0.150146,neutral
7,AHH,http://www.globenewswire.com/news-release/2019...,16.660000,16.660000,16.700001,16.850000,16.690001,16.610001,16.740000,16.799999,16.670000,16.780001,16.920000,16.716000,0.204000,1.220386,neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2568,TM,https://www.timescolonist.com/most-actively-tr...,136.660004,135.940002,137.820007,137.050003,136.550003,136.559998,136.369995,137.410004,136.619995,138.320007,136.279999,136.930002,-0.650003,-0.474697,neutral
2569,BYND,https://www.usatoday.com/story/money/2019/10/0...,142.990005,138.320007,154.339996,151.660004,148.619995,146.419998,143.300003,145.440002,145.720001,145.059998,142.729996,146.187001,-3.457005,-2.364783,neutral
2570,MSFT,https://www.wsj.com/articles/bright-horizons-t...,120.220001,117.050003,117.660004,117.910004,116.769997,116.930000,117.940002,119.019997,119.190002,119.970001,119.360001,118.266001,1.094000,0.925033,neutral
2571,AHH,https://www.wtrf.com/local/main-street-bank-an...,16.750000,16.700001,16.799999,16.790001,16.840000,16.940001,17.059999,17.020000,17.139999,17.240000,17.150000,16.928000,0.222000,1.311437,neutral


In [34]:
forecast_df[forecast_df['sentiment']=='positive']

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff,percent_change,sentiment
0,NKE,http://onlinenewsguru.com/2019/09/01/investors...,80.279999,81.129997,80.529999,82.739998,83.309998,80.440002,82.250000,82.029999,83.480003,85.379997,84.500000,82.156999,2.343001,2.851858,positive
8,AAPL,http://www.globenewswire.com/news-release/2019...,206.490005,204.160004,205.529999,209.009995,208.740005,205.699997,209.190002,213.279999,213.259995,214.169998,216.699997,208.953000,7.746997,3.707531,positive
9,AHH,http://www.globenewswire.com/news-release/2019...,17.170000,17.280001,17.330000,17.340000,17.379999,17.680000,17.620001,17.670000,17.680000,17.680000,18.040001,17.483000,0.557001,3.185957,positive
20,SFST,http://www.mondaq.com/france/x/796724/Antitrus...,32.630001,33.750000,33.650002,33.869999,33.970001,34.480000,34.720001,35.610001,35.820000,36.320000,35.610001,34.482001,1.128000,3.271273,positive
21,VNTR,http://www.morningstar.co.uk/uk/news/181206/ho...,4.290000,4.500000,4.670000,4.300000,4.200000,4.250000,4.440000,4.340000,4.480000,4.790000,4.650000,4.426000,0.224000,5.061003,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2554,PS,https://www.rfdtv.com/story/41086845/american-...,15.900000,15.640000,16.049999,16.900000,17.260000,16.850000,17.520000,18.100000,18.410000,18.049999,18.230000,17.068000,1.162000,6.808063,positive
2556,UFPT,https://www.schaeffersresearch.com/content/ezi...,39.500000,39.119999,38.520000,39.200001,39.410000,39.070000,38.599998,38.759998,39.439999,39.439999,41.290001,39.105999,2.184002,5.584825,positive
2558,MLCO,https://www.scmp.com/business/article/3026562/...,19.670000,19.860001,20.309999,20.469999,20.809999,20.799999,20.059999,20.879999,20.959999,20.950001,21.500000,20.476999,1.023001,4.995852,positive
2559,AHH,https://www.scmp.com/news/china/diplomacy/arti...,17.170000,17.280001,17.330000,17.340000,17.379999,17.680000,17.620001,17.670000,17.680000,17.680000,18.040001,17.483000,0.557001,3.185957,positive


In [35]:
forecast_df[forecast_df['sentiment']=='negative']

Unnamed: 0,SYMBOL,URL,0,1,2,3,4,5,6,7,8,9,10,average,diff,percent_change,sentiment
5,CELG,http://www.globenewswire.com/news-release/2019...,93.279999,93.809998,93.589996,92.410004,91.930000,91.760002,91.529999,90.870003,90.910004,90.199997,89.470001,92.029000,-2.558999,-2.780644,negative
10,CAR,http://www.globenewswire.com/news-release/2019...,29.129999,29.240000,28.879999,29.500000,29.510000,29.500000,29.510000,29.340000,28.980000,27.299999,27.990000,29.089000,-1.099000,-3.778059,negative
14,BIO,http://www.globenewswire.com/news-release/2019...,341.619995,342.989990,340.209991,333.989990,332.739990,327.279999,327.160004,333.559998,339.369995,338.519989,319.730011,335.743994,-16.013983,-4.769701,negative
16,ABT,http://www.globenewswire.com/news-release/2019...,82.440002,82.860001,82.660004,81.839996,83.669998,81.839996,79.529999,81.070000,81.989998,81.040001,78.510002,81.894000,-3.383998,-4.132168,negative
17,ABT,http://www.globenewswire.com/news-release/2019...,82.440002,82.860001,82.660004,81.839996,83.669998,81.839996,79.529999,81.070000,81.989998,81.040001,78.510002,81.894000,-3.383998,-4.132168,negative
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2522,W,https://www.reuters.com/article/us-usa-immigra...,155.320007,157.300003,158.910004,161.710007,158.210007,158.479996,156.330002,162.470001,153.419998,153.770004,152.410004,157.592003,-5.181999,-3.288237,negative
2541,NRG,https://www.reuters.com/article/us-usa-utiliti...,42.410000,42.549999,42.610001,42.459999,42.450001,42.060001,42.459999,42.220001,41.840000,41.369999,41.020000,42.243000,-1.223000,-2.895154,negative
2542,FMAO,https://www.reuters.com/article/us-usa-water-f...,25.719999,25.459999,25.750000,27.010000,27.040001,26.799999,26.900000,27.129999,26.250000,25.700001,25.670000,26.376000,-0.706000,-2.676675,negative
2545,ORCL,https://www.reuters.com/article/us-volvo-cars-...,58.119999,57.540001,57.599998,57.450001,58.110001,58.290001,58.500000,58.060001,57.490002,56.299999,55.880001,57.746000,-1.865999,-3.231391,negative


In [30]:
forecast_df[forecast_df['average'].isnull()]

NameError: name 'forecast_df' is not defined

In [None]:
forecast_df.to_csv("labeled_news.csv",  encoding='utf-8')

In [23]:
news_scraping_results_v2 = news_scraping_results.join(forecast_df[['sentiment']])

In [27]:
news_scraping_results_v2.to_csv('news_scraping_results_and_label_p35_n-35.csv', index=False, encoding='utf-8')

In [28]:
pd.read_csv('news_scraping_results_and_label_p35_n-35.csv')

Unnamed: 0,SYMBOL,SITE_NAME,URL,PUBLISH_TIME,SCRAPED_TIME,TITLE,CONTENT,sentiment
0,NKE,,http://onlinenewsguru.com/2019/09/01/investors...,2019-09-01 07:00:00+00:00,2019-10-09-18-51,Investor's Roundup: Consolidated Water Co. Ltd...,george town cayman islands september 1 2019 sh...,neutral
1,C,,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,2019-05-06 21:55:22+00:00,2019-10-09-18-51,CP Rail raises quarterly dividend 27.5% after ...,cp rail raise quarterly dividend 27 5 after st...,neutral
2,UFPT,,http://www.bnnbloomberg.ca/unilever-subsidiary...,2019-04-09 07:00:00+00:00,2019-10-09-18-52,Unilever subsidiary Schmidt's Natural to launc...,unilever plc un n be dip -pron- toe into the c...,neutral
3,TER,,http://www.businesskorea.co.kr/news/articleVie...,2019-01-24 08:00:00+00:00,2019-10-09-18-51,Multinational Pharma Companies Often Stop Drug...,the korean operation of japanese multinational...,neutral
4,EMMAW,GlobeNewswire,http://www.globenewswire.com/news-release/2019...,2019-07-18 07:00:00+00:00,2019-10-08-18-04,Telemynd Provides Update Following Spinoff Tra...,mission viejo calif and torrance calif july 18...,
...,...,...,...,...,...,...,...,...
2569,BYND,,https://www.usatoday.com/story/money/2019/10/0...,2019-10-09 13:33:00+00:00,2019-10-09-18-51,Johnson & Johnson ordered to pay $8 billion to...,close oklahoma lawyer argue johnson johnson ag...,neutral
2570,MSFT,,https://www.wsj.com/articles/bright-horizons-t...,2019-04-05 07:00:00+00:00,2019-10-09-18-51,"Bright Horizons Tests Parents’ Patience, Not N...",for many thing app be great for other -pron- m...,neutral
2571,AHH,,https://www.wtrf.com/local/main-street-bank-an...,2019-06-20 07:00:00+00:00,2019-10-09-18-52,Main Street Bank anniversary and new announcem...,a local business be celebrate -pron- 18th anni...,neutral
2572,TER,,https://www.yahoo.com/entertainment/hgtvs-wind...,2019-07-10 07:00:00+00:00,2019-10-09-18-50,HGTV's Windy City Rehab Stars Have Work Permit...,the future of hgtv show windy city rehab be un...,neutral


In [40]:
news_scraping_results = news_scraping_results.drop(axis=1, columns='Unnamed: 0' )

In [43]:
news_scraping_results = pd.read_csv('./news_scraping_results.csv')


#news_scraping_results.to_csv('news_scraping_results.csv', encoding='utf-8')

In [44]:
news_scraping_results

Unnamed: 0,SYMBOL,SITE_NAME,URL,PUBLISH_TIME,SCRAPED_TIME,TITLE,CONTENT,sentiment
0,NKE,,http://onlinenewsguru.com/2019/09/01/investors...,2019-09-01 07:00:00+00:00,2019-10-09-18-51,Investor's Roundup: Consolidated Water Co. Ltd...,george town cayman islands september 1 2019 sh...,neutral
1,C,,http://www.bnnbloomberg.ca/cp-rail-raises-quar...,2019-05-06 21:55:22+00:00,2019-10-09-18-51,CP Rail raises quarterly dividend 27.5% after ...,cp rail raise quarterly dividend 27 5 after st...,neutral
2,UFPT,,http://www.bnnbloomberg.ca/unilever-subsidiary...,2019-04-09 07:00:00+00:00,2019-10-09-18-52,Unilever subsidiary Schmidt's Natural to launc...,unilever plc un n be dip -pron- toe into the c...,neutral
3,TER,,http://www.businesskorea.co.kr/news/articleVie...,2019-01-24 08:00:00+00:00,2019-10-09-18-51,Multinational Pharma Companies Often Stop Drug...,the korean operation of japanese multinational...,neutral
4,EMMAW,GlobeNewswire,http://www.globenewswire.com/news-release/2019...,2019-07-18 07:00:00+00:00,2019-10-08-18-04,Telemynd Provides Update Following Spinoff Tra...,mission viejo calif and torrance calif july 18...,
...,...,...,...,...,...,...,...,...
2569,BYND,,https://www.usatoday.com/story/money/2019/10/0...,2019-10-09 13:33:00+00:00,2019-10-09-18-51,Johnson & Johnson ordered to pay $8 billion to...,close oklahoma lawyer argue johnson johnson ag...,neutral
2570,MSFT,,https://www.wsj.com/articles/bright-horizons-t...,2019-04-05 07:00:00+00:00,2019-10-09-18-51,"Bright Horizons Tests Parents’ Patience, Not N...",for many thing app be great for other -pron- m...,neutral
2571,AHH,,https://www.wtrf.com/local/main-street-bank-an...,2019-06-20 07:00:00+00:00,2019-10-09-18-52,Main Street Bank anniversary and new announcem...,a local business be celebrate -pron- 18th anni...,neutral
2572,TER,,https://www.yahoo.com/entertainment/hgtvs-wind...,2019-07-10 07:00:00+00:00,2019-10-09-18-50,HGTV's Windy City Rehab Stars Have Work Permit...,the future of hgtv show windy city rehab be un...,neutral


In [42]:
news_scraping_results.to_csv('news_scraping_results_labeled.csv', index=False, encoding='utf-8')