In [1]:
from bs4 import BeautifulSoup
import urllib2
import pandas as pd
import numpy as np

from pandas_datareader import data, wb

from datetime import date, timedelta, datetime
from time import sleep
import os.path
import re

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
if not os.path.exists('datasets/s_p_500.csv'):
    s_p_500 = pd.read_html('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies', attrs = {"class":'wikitable sortable'}, header = 0)[0]
    s_p_500.to_csv('datasets/s_p_500.csv', encoding = "utf-8")
else:
    s_p_500 = pd.read_csv('datasets/s_p_500.csv', index_col = 0)

s_p_500.columns = [x.strip().replace(' ', '_') for x in s_p_500.columns]

In [3]:
industries = s_p_500.GICS_Sector.unique()
industries

array(['Industrials', 'Health Care', 'Information Technology',
       'Consumer Discretionary', 'Utilities', 'Financials', 'Materials',
       'Consumer Staples', 'Real Estate', 'Energy',
       'Telecommunications Services'], dtype=object)

In [4]:
companies = [s_p_500[(s_p_500.GICS_Sector == industry)].head(3) for industry in industries]
companies = pd.concat(companies, axis = 0)
companies.head()

Unnamed: 0,Ticker_symbol,Security,SEC_filings,GICS_Sector,GICS_Sub_Industry,Address_of_Headquarters,Date_first_added,CIK
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740
5,AYI,Acuity Brands Inc,reports,Industrials,Electrical Components & Equipment,"Atlanta, Georgia",2016-05-03,1144215
15,ALK,Alaska Air Group Inc,reports,Industrials,Airlines,"Seattle, Washington",2016-05-13,766421
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800
2,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152


In [5]:
companies.Ticker_symbol = companies.Ticker_symbol.apply(lambda x: x.replace('-', ''))

In [6]:
def get_news_for_symbol(symbol, start_date = date(2014, 1, 1), end_date = date(2014, 1, 15)):
    def get_article(href, text, date):
        link = {}
        link['Url'] = "http://www.reuters.com/"+href
        link['Title'] = text
        link['Symbol'] = symbol
        link['Date'] = date
        soup = BeautifulSoup(urllib2.urlopen(link['Url']), "lxml")
        link['Article'] = soup.find_all(
            'span', {"id":"article-text"})[0].text.replace('\n', ' ')
        link['Time'] = soup.find_all(
            'span', {"class":"timestamp"})[0].text
        sleep(0.1)
        return link

    links = []
    for day in [start_date + timedelta(n) for n in range((end_date - start_date).days)]:
#         print symbol,day.strftime('%m%d%Y')
        url = "http://www.reuters.com/finance/stocks/companyNews?symbol={}&date={}".format(symbol,day.strftime('%m%d%Y'))
        news = BeautifulSoup(urllib2.urlopen(url), "lxml").find_all('div', {"id":"companyNews"})
#         print url
        for i in xrange(2):
            for feature in news[i].find_all('h2'):
                a = feature.find('a')
                if a.has_attr('href'):
                    links.append(get_article(a['href'], a.text, day))
        sleep(0.1)
    return links

In [7]:
companies = companies.loc[companies.Ticker_symbol == 'MMM']

In [8]:
articles = []
if not os.path.exists('datasets/articles3.csv'):
    for symbol in companies.Ticker_symbol:
        articles +=  get_news_for_symbol(symbol, end_date = date.today())
    articles = pd.DataFrame(articles)
    articles.to_csv('datasets/articles3.csv', encoding = "utf8")
else:
    articles = pd.read_csv('datasets/articles3.csv', encoding = "utf8", index_col = 0)

articles['Date'] = pd.to_datetime(articles['Date'], format='%Y-%m-%d')

articles.head()

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,"Fri Jan 17, 2014 | 7:49am EST",Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...
4,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,"Sun Mar 9, 2014 | 8:19pm EDT",SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...


In [9]:
def get_day_by_day_results(url):
    soup = BeautifulSoup(urllib2.urlopen(url), 'lxml')
    soup.findAll
    tablerows = []
    row_list = []
    table_of_stuff = soup.findAll('h3', text=re.compile(r'AL  Overall'))[0].parent.find_next_sibling()
    for stat in row.findAll('td'):
        row_list.append(stat.get_text())
        tablerows.append(row_list)
    return tablerows

In [10]:
def get_quotes(symbol, start_date='1/1/2014', end_date=date.today().strftime('%m/%d/%Y')):
    ret = data.DataReader(symbol, data_source='yahoo', start=start_date, end=end_date)
    ret['Symbol'] = symbol
    return ret

In [11]:
quotes = ""
if not os.path.exists('datasets/daily_quotes2.csv'):
    quotes = [get_quotes(symbol) for symbol in companies.Ticker_symbol ]
    quotes = pd.concat(quotes, axis=0)
    quotes.to_csv('daily_quotes2.csv', encoding = "utf8")
else:
    quotes = pd.read_csv('datasets/daily_quotes2.csv', index_col=0)

quotes['Date'] = pd.to_datetime(quotes['Date'], format='%Y-%m-%d')
    
quotes.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close,Symbol
0,2014-01-02,138.050003,139.0,137.610001,138.130005,3052100,127.971294,MMM
1,2014-01-03,138.369995,138.880005,138.029999,138.449997,2133300,128.267753,MMM
2,2014-01-06,138.75,139.289993,137.479996,137.630005,2712100,127.508066,MMM
3,2014-01-07,137.449997,138.199997,137.130005,137.649994,2525500,127.526585,MMM
4,2014-01-08,137.479996,137.75,136.339996,136.630005,3122700,126.581611,MMM


In [12]:
articles_mmm = articles.loc[articles.Symbol=='MMM']
quotes_mmm = quotes.loc[quotes.Symbol=='MMM']

In [13]:
articles_mmm

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,"Fri Jan 17, 2014 | 7:49am EST",Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...
4,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,"Sun Mar 9, 2014 | 8:19pm EDT",SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...
5,March 18 Diversified manufacturer 3M Co exp...,2014-03-18,MMM,"Tue Mar 18, 2014 | 12:33am EDT",3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...
6,"MOSCOW, March 27 Russian home electronics re...",2014-03-27,MMM,"Thu Mar 27, 2014 | 3:18am EDT",Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...
7,Post-it notes maker 3M Co (MMM.N) said its b...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:23pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...
8,April 15 Post-it notes maker 3M Co said its ...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:20pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/3m-shareholder...
9,* Apple rallies a day after announcing 7-for...,2014-04-24,MMM,"Thu Apr 24, 2014 | 4:40pm EDT",US STOCKS-Apple buoys Nasdaq; Ukraine weighs o...,http://www.reuters.com//article/markets-usa-st...


In [14]:
articles_mmm_1 = articles_mmm.loc[articles_mmm.Date=='2014-01-30'].copy()
articles_mmm_1

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...


In [15]:
a = re.match(r'[0-9]?[0-9]:[0-9]{1,2}[a-zA-z]{2} [a-zA-Z]{3}', 'Thu Jan 30, 2014 | 9:44am EST')
a

In [16]:
sttime = 'Thu Jan 30, 2014 | 19:44am EST'

In [17]:
m = re.search('[0-9]?[0-9]:[0-9]{1,2}', sttime)
if m:
    print m.group(0)

19:44


In [18]:
# articles_mmm_1.Time = articles_mmm_1.Time.apply(lambda x: re.search('[0-9]?[0-9]:[0-9]{1,2}', x).group(0))
articles_mmm_1.Time = articles_mmm_1.Time.apply(lambda x: re.search('[0-9]?[0-9]:[0-9]{1,2}', x).group(0))
articles_mmm_1

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,9:44,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,9:42,"UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,7:39,Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...


In [19]:
articles_mmm_1.Title = articles_mmm_1.Title.apply(lambda x: re.sub(r'.+?-', '', x))
articles_mmm_1

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,9:44,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,9:42,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,7:39,Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...


In [20]:
articles_mmm_1.Title.unique()

array([u'3M revenue growth slows, misses Wall Street target',
       u'Manufacturer 3M posts 11 percent rise in net income'], dtype=object)

In [21]:
articles

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,"Fri Jan 17, 2014 | 7:49am EST",Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...
4,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,"Sun Mar 9, 2014 | 8:19pm EDT",SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...
5,March 18 Diversified manufacturer 3M Co exp...,2014-03-18,MMM,"Tue Mar 18, 2014 | 12:33am EDT",3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...
6,"MOSCOW, March 27 Russian home electronics re...",2014-03-27,MMM,"Thu Mar 27, 2014 | 3:18am EDT",Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...
7,Post-it notes maker 3M Co (MMM.N) said its b...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:23pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...
8,April 15 Post-it notes maker 3M Co said its ...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:20pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/3m-shareholder...
9,* Apple rallies a day after announcing 7-for...,2014-04-24,MMM,"Thu Apr 24, 2014 | 4:40pm EDT",US STOCKS-Apple buoys Nasdaq; Ukraine weighs o...,http://www.reuters.com//article/markets-usa-st...


In [22]:
articles.Time = articles.Time.apply(lambda x: re.search('[0-9]?[0-9]:[0-9]{1,2}', x).group(0))
articles.Title = articles.Title.apply(lambda x: x.replace(r'.+?-', ''))

In [23]:
daily_articles = []
for symbol in articles.Symbol.unique():
    symbol_articles = articles.loc[articles.Symbol == symbol]
    for date in symbol_articles.Date.unique():
        date_articles = symbol_articles.loc[symbol_articles.Date == date]
        daily_articles.append(date_articles.sort_values('Time', ascending=False).iloc[0:1,:])

In [24]:
daily_articles = pd.concat(daily_articles, axis=0).reset_index().drop('index', axis=1)
daily_articles

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,7:49,Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,9:44,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,8:19,SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...
3,March 18 Diversified manufacturer 3M Co exp...,2014-03-18,MMM,12:33,3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...
4,"MOSCOW, March 27 Russian home electronics re...",2014-03-27,MMM,3:18,Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...
5,Post-it notes maker 3M Co (MMM.N) said its b...,2014-04-15,MMM,12:23,3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...
6,"* Tech results lift Nasdaq futures, Apple an...",2014-04-24,MMM,8:47,US STOCKS-Futures point to higher open on tech...,http://www.reuters.com//article/markets-usa-st...
7,MOSCOW May 12 Russia's biggest home electron...,2014-05-12,MMM,10:33,Russia's M.video recommends 45 pct hike in 201...,http://www.reuters.com//article/russia-mvideo-...
8,"June 9 Amgen Inc, the world's largest biotec...",2014-06-09,MMM,4:22,Amgen names former 3M CFO Meline as finance chief,http://www.reuters.com//article/amgen-cfo-idUS...
9,"(Adds Allergan, Blackstone, 3M, Elliott, Int...",2014-07-16,MMM,4:06,Deals of the day- Mergers and acquisitions,http://www.reuters.com//article/deals-day-idUS...


In [25]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [26]:
news='''
Nov 15 Tidewater Midstream and Infrastructure Ltd
* Tidewater Midstream and Infrastructure - Tidewater expects Q4 2016 adjusted EBITDA to be in line with previously announced guidance of about $11 - $12 million
* Q3 earnings per share c$0.01
* Qtrly total revenues c$ 27 million versus c$ 7.9 million
* Q3 earnings per share view c$0.02 -- Thomson Reuters I/B/E/S
* Tidewater Midstream and Infrastructure Ltd. announces third quarter 2016 results Source text for 
'''

In [27]:
sid = SentimentIntensityAnalyzer()

In [28]:
ss = sid.polarity_scores(news)
for k in sorted(ss):
        print('{0}: {1}, '.format(k, ss[k], end=''))

compound: 0.5267, 
neg: 0.0, 
neu: 0.937, 
pos: 0.063, 


In [29]:
sorted(ss)[0]

'compound'

In [30]:
ss['compound']

0.5267

In [31]:
daily_articles['has_news'] = 1

In [32]:
news_quotes = daily_articles.set_index('Date').join(
    quotes_mmm.set_index('Date'), rsuffix='r').drop('Symbolr', axis=1)
print news_quotes.shape
news_quotes

(48, 12)


Unnamed: 0_level_0,Article,Symbol,Time,Title,Url,has_news,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014-01-17,MOSCOW Jan 17 Russia's biggest home electron...,MMM,7:49,Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...,1,137.979996,138.75,136.979996,137.309998,2327900,127.211594
2014-01-30,By Lewis Krauskopf Diversified manufacture...,MMM,9:44,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...,1,129.960007,129.960007,126.529999,128.050003,4102600,118.632622
2014-03-10,3M Co. has invested US$15 million in one of ...,MMM,8:19,SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...,1,133.979996,134.080002,133.100006,133.559998,1966100,124.550414
2014-03-18,March 18 Diversified manufacturer 3M Co exp...,MMM,12:33,3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...,1,132.490005,133.229996,132.25,132.740005,2032800,123.785737
2014-03-27,"MOSCOW, March 27 Russian home electronics re...",MMM,3:18,Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...,1,132.820007,133.490005,132.179993,132.839996,2402500,123.878982
2014-04-15,Post-it notes maker 3M Co (MMM.N) said its b...,MMM,12:23,3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...,1,133.270004,134.350006,132.020004,134.089996,2119000,125.044661
2014-04-24,"* Tech results lift Nasdaq futures, Apple an...",MMM,8:47,US STOCKS-Futures point to higher open on tech...,http://www.reuters.com//article/markets-usa-st...,1,136.380005,136.990005,135.0,136.649994,2847800,127.431968
2014-05-12,MOSCOW May 12 Russia's biggest home electron...,MMM,10:33,Russia's M.video recommends 45 pct hike in 201...,http://www.reuters.com//article/russia-mvideo-...,1,141.779999,142.940002,141.710007,142.820007,2972100,133.18577
2014-06-09,"June 9 Amgen Inc, the world's largest biotec...",MMM,4:22,Amgen names former 3M CFO Meline as finance chief,http://www.reuters.com//article/amgen-cfo-idUS...,1,144.639999,145.529999,144.610001,145.320007,1800900,136.348464
2014-07-16,"(Adds Allergan, Blackstone, 3M, Elliott, Int...",MMM,4:06,Deals of the day- Mergers and acquisitions,http://www.reuters.com//article/deals-day-idUS...,1,145.279999,146.25,145.029999,146.169998,1969200,137.145979


In [33]:
lambdafunc = lambda x: pd.Series([sid.polarity_scores(x['Article'])['compound'],
                                  sid.polarity_scores(x['Article'])['neg'],
                                  sid.polarity_scores(x['Article'])['neu'],
                                  sid.polarity_scores(x['Article'])['pos']
                                 ])

In [34]:
sentiments = news_quotes.apply(lambdafunc, axis=1)
sentiments.columns = ['compound', 'neg', 'neu', 'pos']
news_quotes= news_quotes.join(sentiments)
news_quotes

Unnamed: 0_level_0,Article,Symbol,Time,Title,Url,has_news,Open,High,Low,Close,Volume,Adj Close,compound,neg,neu,pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2014-01-17,MOSCOW Jan 17 Russia's biggest home electron...,MMM,7:49,Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...,1,137.979996,138.75,136.979996,137.309998,2327900,127.211594,0.0516,0.049,0.901,0.051
2014-01-30,By Lewis Krauskopf Diversified manufacture...,MMM,9:44,"3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...,1,129.960007,129.960007,126.529999,128.050003,4102600,118.632622,0.7096,0.037,0.896,0.067
2014-03-10,3M Co. has invested US$15 million in one of ...,MMM,8:19,SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...,1,133.979996,134.080002,133.100006,133.559998,1966100,124.550414,0.4019,0.0,0.958,0.042
2014-03-18,March 18 Diversified manufacturer 3M Co exp...,MMM,12:33,3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...,1,132.490005,133.229996,132.25,132.740005,2032800,123.785737,0.7355,0.031,0.909,0.06
2014-03-27,"MOSCOW, March 27 Russian home electronics re...",MMM,3:18,Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...,1,132.820007,133.490005,132.179993,132.839996,2402500,123.878982,0.6124,0.0,0.926,0.074
2014-04-15,Post-it notes maker 3M Co (MMM.N) said its b...,MMM,12:23,3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...,1,133.270004,134.350006,132.020004,134.089996,2119000,125.044661,0.8271,0.0,0.902,0.098
2014-04-24,"* Tech results lift Nasdaq futures, Apple an...",MMM,8:47,US STOCKS-Futures point to higher open on tech...,http://www.reuters.com//article/markets-usa-st...,1,136.380005,136.990005,135.0,136.649994,2847800,127.431968,0.9927,0.006,0.885,0.109
2014-05-12,MOSCOW May 12 Russia's biggest home electron...,MMM,10:33,Russia's M.video recommends 45 pct hike in 201...,http://www.reuters.com//article/russia-mvideo-...,1,141.779999,142.940002,141.710007,142.820007,2972100,133.18577,0.5719,0.019,0.91,0.07
2014-06-09,"June 9 Amgen Inc, the world's largest biotec...",MMM,4:22,Amgen names former 3M CFO Meline as finance chief,http://www.reuters.com//article/amgen-cfo-idUS...,1,144.639999,145.529999,144.610001,145.320007,1800900,136.348464,0.7964,0.0,0.949,0.051
2014-07-16,"(Adds Allergan, Blackstone, 3M, Elliott, Int...",MMM,4:06,Deals of the day- Mergers and acquisitions,http://www.reuters.com//article/deals-day-idUS...,1,145.279999,146.25,145.029999,146.169998,1969200,137.145979,0.9964,0.037,0.872,0.091


In [35]:
news_quotes.shape

(48, 16)

In [36]:
from bokeh.plotting import figure, show, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool
output_notebook()

In [37]:
quote_source = ColumnDataSource(
        data=dict(
            x=quotes_mmm.Date,
            y=quotes_mmm.Close,
        )
    )

articles_source = ColumnDataSource(
        data=dict(
            x = news_quotes.index,
            y = news_quotes.Close,
            compound=news_quotes['compound'],
            neg=news_quotes['neg'],
            neu=news_quotes['neu'],
            pos=news_quotes['pos'],
            
        
        )
    )
p = figure(width=8500, height=500, x_axis_type='datetime')
p.line('x', 'y', color="Red", alpha=0.8, line_width=2, source=quote_source)

r2 = p.circle('x', 'y', source=articles_source, size=5)
p.add_tools(HoverTool(renderers=[r2], tooltips={
            "compound": "@compound",
            "neg": "@neg",
            "neu": "@neu",
            "pos": "@pos",
        }))
show(p)

In [38]:
a = news_quotes
b = quotes_mmm.set_index('Date')

In [39]:
c = b.join(a, rsuffix='r')
print c.shape
c

(732, 23)


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,Symbol,Article,Symbolr,Time,...,Openr,Highr,Lowr,Closer,Volumer,Adj Closer,compound,neg,neu,pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,138.050003,139.000000,137.610001,138.130005,3052100,127.971294,MMM,,,,...,,,,,,,,,,
2014-01-03,138.369995,138.880005,138.029999,138.449997,2133300,128.267753,MMM,,,,...,,,,,,,,,,
2014-01-06,138.750000,139.289993,137.479996,137.630005,2712100,127.508066,MMM,,,,...,,,,,,,,,,
2014-01-07,137.449997,138.199997,137.130005,137.649994,2525500,127.526585,MMM,,,,...,,,,,,,,,,
2014-01-08,137.479996,137.750000,136.339996,136.630005,3122700,126.581611,MMM,,,,...,,,,,,,,,,
2014-01-09,137.020004,137.429993,136.199997,136.449997,2539900,126.414842,MMM,,,,...,,,,,,,,,,
2014-01-10,136.949997,137.190002,135.679993,136.179993,3254300,126.164695,MMM,,,,...,,,,,,,,,,
2014-01-13,135.809998,137.059998,134.570007,134.679993,2633400,124.775012,MMM,,,,...,,,,,,,,,,
2014-01-14,135.110001,137.410004,134.740005,137.410004,2479700,127.304245,MMM,,,,...,,,,,,,,,,
2014-01-15,137.770004,138.820007,137.559998,138.440002,3025900,128.258493,MMM,,,,...,,,,,,,,,,


In [40]:
c[(c.index == '2014-01-17')]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,Symbol,Article,Symbolr,Time,...,Openr,Highr,Lowr,Closer,Volumer,Adj Closer,compound,neg,neu,pos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-17,137.979996,138.75,136.979996,137.309998,2327900,127.211594,MMM,MOSCOW Jan 17 Russia's biggest home electron...,MMM,7:49,...,137.979996,138.75,136.979996,137.309998,2327900.0,127.211594,0.0516,0.049,0.901,0.051


In [41]:
c['N_Close'] = c['Close'].shift(-1)
c['P_Close'] = c['Close'].shift(1)

In [42]:
c

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,Symbol,Article,Symbolr,Time,...,Lowr,Closer,Volumer,Adj Closer,compound,neg,neu,pos,N_Close,P_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,138.050003,139.000000,137.610001,138.130005,3052100,127.971294,MMM,,,,...,,,,,,,,,138.449997,
2014-01-03,138.369995,138.880005,138.029999,138.449997,2133300,128.267753,MMM,,,,...,,,,,,,,,137.630005,138.130005
2014-01-06,138.750000,139.289993,137.479996,137.630005,2712100,127.508066,MMM,,,,...,,,,,,,,,137.649994,138.449997
2014-01-07,137.449997,138.199997,137.130005,137.649994,2525500,127.526585,MMM,,,,...,,,,,,,,,136.630005,137.630005
2014-01-08,137.479996,137.750000,136.339996,136.630005,3122700,126.581611,MMM,,,,...,,,,,,,,,136.449997,137.649994
2014-01-09,137.020004,137.429993,136.199997,136.449997,2539900,126.414842,MMM,,,,...,,,,,,,,,136.179993,136.630005
2014-01-10,136.949997,137.190002,135.679993,136.179993,3254300,126.164695,MMM,,,,...,,,,,,,,,134.679993,136.449997
2014-01-13,135.809998,137.059998,134.570007,134.679993,2633400,124.775012,MMM,,,,...,,,,,,,,,137.410004,136.179993
2014-01-14,135.110001,137.410004,134.740005,137.410004,2479700,127.304245,MMM,,,,...,,,,,,,,,138.440002,134.679993
2014-01-15,137.770004,138.820007,137.559998,138.440002,3025900,128.258493,MMM,,,,...,,,,,,,,,138.160004,137.410004


In [43]:
slopesfunc = lambda x: pd.Series([
        x['Close'] - x['P_Close'],
        x['N_Close'] - x['Close'],
        (x['N_Close'] - x['Close']) - (x['Close'] - x['P_Close']),
    ])

In [44]:
d = c.apply(slopesfunc, axis=1)
d.columns = ['P_Slope', 'N_Slope', 'Diff_Slope']
d

Unnamed: 0_level_0,P_Slope,N_Slope,Diff_Slope
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-01-02,,0.319992,
2014-01-03,0.319992,-0.819992,-1.139984
2014-01-06,-0.819992,0.019989,0.839981
2014-01-07,0.019989,-1.019989,-1.039978
2014-01-08,-1.019989,-0.180008,0.839981
2014-01-09,-0.180008,-0.270004,-0.089996
2014-01-10,-0.270004,-1.500000,-1.229996
2014-01-13,-1.500000,2.730011,4.230011
2014-01-14,2.730011,1.029998,-1.700013
2014-01-15,1.029998,-0.279998,-1.309996


In [45]:
e = pd.concat([c, d], axis=1)

In [46]:
f = e.loc[e['Article'].notnull(), :]

In [47]:
quote_source = ColumnDataSource(
        data=dict(
            x=quotes_mmm.Date,
            y=quotes_mmm.Close,
        )
    )

articles_source = ColumnDataSource(
        data=dict(
            x = f.index,
            y = f.Close,
            compound=f['compound'],
            neg=f['neg'],
            neu=f['neu'],
            pos=f['pos'],
            p_slope=f['P_Slope'],
            n_slope=f['N_Slope'],
            diff_slope = f['Diff_Slope']
        )
    )
p = figure(width=8500, height=400, x_axis_type='datetime')
p.line('x', 'y', color="Red", alpha=0.8, line_width=3, source=quote_source)

r2 = p.circle('x', 'y', source=articles_source, size=8)
p.add_tools(HoverTool(renderers=[r2], tooltips={
            "Sentiment (compound)": "@compound",
            "Sentiment (negative)": "@neg",
            "Sentiment (Neutral)": "@neu",
            "Sentiment (Positive)": "@pos",
            "Next Slope": "@n_slope",
            "Previous Slope": "@p_slope",
            "Slope Diff": "@diff_slope"
        }))
show(p)

In [48]:
f

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close,Symbol,Article,Symbolr,Time,...,Adj Closer,compound,neg,neu,pos,N_Close,P_Close,P_Slope,N_Slope,Diff_Slope
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-17,137.979996,138.75,136.979996,137.309998,2327900,127.211594,MMM,MOSCOW Jan 17 Russia's biggest home electron...,MMM,7:49,...,127.211594,0.0516,0.049,0.901,0.051,136.990005,138.160004,-0.850006,-0.319993,0.530013
2014-01-30,129.960007,129.960007,126.529999,128.050003,4102600,118.632622,MMM,By Lewis Krauskopf Diversified manufacture...,MMM,9:44,...,118.632622,0.7096,0.037,0.896,0.067,128.190002,130.25,-2.199997,0.139999,2.339996
2014-03-10,133.979996,134.080002,133.100006,133.559998,1966100,124.550414,MMM,3M Co. has invested US$15 million in one of ...,MMM,8:19,...,124.550414,0.4019,0.0,0.958,0.042,132.509995,134.110001,-0.550003,-1.050003,-0.5
2014-03-18,132.490005,133.229996,132.25,132.740005,2032800,123.785737,MMM,March 18 Diversified manufacturer 3M Co exp...,MMM,12:33,...,123.785737,0.7355,0.031,0.909,0.06,131.229996,132.270004,0.470001,-1.510009,-1.98001
2014-03-27,132.820007,133.490005,132.179993,132.839996,2402500,123.878982,MMM,"MOSCOW, March 27 Russian home electronics re...",MMM,3:18,...,123.878982,0.6124,0.0,0.926,0.074,134.199997,133.089996,-0.25,1.360001,1.610001
2014-04-15,133.270004,134.350006,132.020004,134.089996,2119000,125.044661,MMM,Post-it notes maker 3M Co (MMM.N) said its b...,MMM,12:23,...,125.044661,0.8271,0.0,0.902,0.098,136.770004,133.149994,0.940002,2.680008,1.740006
2014-04-24,136.380005,136.990005,135.0,136.649994,2847800,127.431968,MMM,"* Tech results lift Nasdaq futures, Apple an...",MMM,8:47,...,127.431968,0.9927,0.006,0.885,0.109,136.559998,137.990005,-1.340011,-0.089996,1.250015
2014-05-12,141.779999,142.940002,141.710007,142.820007,2972100,133.18577,MMM,MOSCOW May 12 Russia's biggest home electron...,MMM,10:33,...,133.18577,0.5719,0.019,0.91,0.07,142.429993,141.339996,1.480011,-0.390014,-1.870025
2014-06-09,144.639999,145.529999,144.610001,145.320007,1800900,136.348464,MMM,"June 9 Amgen Inc, the world's largest biotec...",MMM,4:22,...,136.348464,0.7964,0.0,0.949,0.051,144.970001,144.639999,0.680008,-0.350006,-1.030014
2014-07-16,145.279999,146.25,145.029999,146.169998,1969200,137.145979,MMM,"(Adds Allergan, Blackstone, 3M, Elliott, Int...",MMM,4:06,...,137.145979,0.9964,0.037,0.872,0.091,143.539993,145.059998,1.11,-2.630005,-3.740005


In [50]:
if not os.path.exists('datasets/s_p_500.csv'):
    s_p_500 = pd.read_html('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies', attrs = {"class":'wikitable sortable'}, header = 0)[0]
    s_p_500.to_csv('datasets/s_p_500.csv', encoding = "utf-8")
else:
    s_p_500 = pd.read_csv('datasets/s_p_500.csv', index_col = 0)

s_p_500.columns = [x.strip().replace(' ', '_') for x in s_p_500.columns]
s_p_500.Ticker_symbol = s_p_500.Ticker_symbol.apply(lambda x: x.replace('-', ''))

In [52]:
articles = []
# if not os.path.exists('datasets/articles.csv'):
for symbol in s_p_500['Ticker symbol']:
    print symbol
    if not os.path.exists('datasets/companies/{}.csv'.format(symbol)):
        print "No data found for {}".format(symbol)
    else:
        article = pd.read_csv('datasets/companies/{}.csv'.format(symbol), encoding='utf8', index_col=0)
        articles.append(article)
articles = pd.concat(articles, axis=0)

articles['Date'] = pd.to_datetime(articles['Date'], format='%Y-%m-%d')

MMM
ABT
ABBV
ACN
ATVI
AYI
ADBE
AAP
AES
AET
AFL
AMG
A
APD
AKAM
ALK
ALB
AGN
LNT
ALXN
ALLE
ADS
ALL
GOOGL
GOOG
MO
AMZN
AEE
AAL
AEP
AXP
AIG
AMT
AWK
AMP
ABC
AME
AMGN
APH
APC
ADI
ANTM
AON
APA
AIV
AAPL
AMAT
ADM
ARNC
AJG
AIZ
T
ADSK
ADP
AN
AZO
AVB
AVY
BHI
BLL
BAC
BK
BCR
BAX
BBT
BDX
BBBY
BRK-B
No data found for BRK-B
BBY
BIIB
BLK
HRB
BA
BWA
BXP
BSX
BMY
AVGO
BF-B
No data found for BF-B
CHRW
CA
COG
CPB
COF
CAH
HSIC
KMX
CCL
CAT
CBG
CBS
CELG
CNC
CNP
CTL
CERN
CF
SCHW
CHTR
CHK
CVX
CMG
CB
CHD
CI
XEC
CINF
CTAS
CSCO
C
CFG
CTXS
CLX
CME
CMS
COH
KO
CTSH
CL
CMCSA
CMA
CAG
CXO
COP
ED
STZ
GLW
COST
COTY
CCI
CSRA
CSX
CMI
CVS
DHI
DHR
DRI
DVA
DE
DLPH
DAL
No data found for DAL
XRAY
DVN
DLR
DFS
DISCA
No data found for DISCA
DISCK
DG
DLTR
D
DOV
DOW
DPS
DTE
DD
DUK
DNB
ETFC
EMN
ETN
EBAY
ECL
EIX
EW
No data found for EW
EA
No data found for EA
EMR
No data found for EMR
ENDP
No data found for ENDP
ETR
No data found for ETR
EOG
No data found for EOG
EQT
No data found for EQT
EFX
No data found for EFX
EQIX
No data found for E

In [53]:
articles

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,"Fri Jan 17, 2014 | 7:49am EST",Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...
4,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,"Sun Mar 9, 2014 | 8:19pm EDT",SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...
5,March 18 Diversified manufacturer 3M Co exp...,2014-03-18,MMM,"Tue Mar 18, 2014 | 12:33am EDT",3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...
6,"MOSCOW, March 27 Russian home electronics re...",2014-03-27,MMM,"Thu Mar 27, 2014 | 3:18am EDT",Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...
7,Post-it notes maker 3M Co (MMM.N) said its b...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:23pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...
8,April 15 Post-it notes maker 3M Co said its ...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:20pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/3m-shareholder...
9,* Apple rallies a day after announcing 7-for...,2014-04-24,MMM,"Thu Apr 24, 2014 | 4:40pm EDT",US STOCKS-Apple buoys Nasdaq; Ukraine weighs o...,http://www.reuters.com//article/markets-usa-st...


In [54]:
symbols = articles.Symbol.unique()
symbols

array([u'MMM', u'ABT', u'ABBV', u'ACN', u'ATVI', u'AYI', u'ADBE', u'AAP',
       u'AES', u'AET', u'AFL', u'AMG', u'A', u'APD', u'AKAM', u'ALK',
       u'ALB', u'AGN', u'LNT', u'ALXN', u'ADS', u'ALL', u'GOOGL', u'GOOG',
       u'MO', u'AMZN', u'AEE', u'AAL', u'AEP', u'AXP', u'AIG', u'AMT',
       u'AWK', u'AMP', u'ABC', u'AME', u'AMGN', u'APH', u'APC', u'ADI',
       u'ANTM', u'AON', u'APA', u'AIV', u'AAPL', u'AMAT', u'ADM', u'ARNC',
       u'AJG', u'AIZ', u'T', u'ADSK', u'ADP', u'AN', u'AZO', u'AVB',
       u'AVY', u'BHI', u'BLL', u'BAC', u'BK', u'BCR', u'BAX', u'BBT',
       u'BDX', u'BBBY', u'BBY', u'BIIB', u'BLK', u'HRB', u'BA', u'BWA',
       u'BXP', u'BSX', u'BMY', u'AVGO', u'CHRW', u'CA', u'COG', u'CPB',
       u'COF', u'CAH', u'HSIC', u'KMX', u'CCL', u'CAT', u'CBG', u'CBS',
       u'CELG', u'CNC', u'CNP', u'CTL', u'CERN', u'CF', u'SCHW', u'CHTR',
       u'CHK', u'CVX', u'CMG', u'CB', u'CHD', u'CI', u'XEC', u'CINF',
       u'CTAS', u'CSCO', u'C', u'CFG', u'CTXS', u'CLX', u'CME', 

### Important use these lines!!!!

In [71]:
symbols = pd.Series(symbols)
symbols.to_csv('datasets/sentimentalized_symbols.csv')

In [94]:
sentimentalized_symbols = list(pd.read_csv('datasets/sentimentalized_symbols.csv', header=None)[1])
# sentimentalized_symbols

In [91]:
a1 = ['a1', 'a2', 'a3', 'a4']
a2 = ['a1', 'a3']

In [92]:
[x for x in a1 if x not in a2]

['a2', 'a4']

In [None]:
quotes = [get_quotes(symbol) for symbol in symbols ]
quotes = pd.concat(quotes, axis=0)
quotes.to_csv('daily_quotes3.csv', encoding = "utf8")
# quotes['Date'] = pd.to_datetime(quotes['Date'], format='%Y-%m-%d')
    
quotes.head()

In [56]:
quotes.index = pd.to_datetime(quotes.index, format='%Y-%m-%d')

In [57]:
articles.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28109 entries, 0 to 17
Data columns (total 6 columns):
Article    28109 non-null object
Date       28109 non-null datetime64[ns]
Symbol     28109 non-null object
Time       28109 non-null object
Title      28109 non-null object
Url        28109 non-null object
dtypes: datetime64[ns](1), object(5)
memory usage: 1.5+ MB


In [58]:
quotes.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 116423 entries, 2014-01-02 to 2016-12-08
Data columns (total 7 columns):
Open         116423 non-null float64
High         116423 non-null float64
Low          116423 non-null float64
Close        116423 non-null float64
Volume       116423 non-null int64
Adj Close    116423 non-null float64
Symbol       116423 non-null object
dtypes: float64(5), int64(1), object(1)
memory usage: 7.1+ MB


In [59]:
quotes.to_csv('daily_quotes3.csv', encoding = "utf8")
articles.to_csv('articles4.csv', encoding = "utf8")

In [60]:
sentiments = articles.apply(lambdafunc, axis=1)

In [61]:
sentiments

Unnamed: 0,0,1,2,3
0,0.0516,0.049,0.901,0.051
1,0.7096,0.037,0.896,0.067
2,0.7964,0.038,0.886,0.076
3,0.7430,0.000,0.885,0.115
4,0.4019,0.000,0.958,0.042
5,0.7355,0.031,0.909,0.060
6,0.6124,0.000,0.926,0.074
7,0.8271,0.000,0.902,0.098
8,0.8271,0.000,0.903,0.097
9,0.9617,0.055,0.851,0.094


In [95]:
sents = sentiments.reset_index().drop('index', axis=1)
sents

Unnamed: 0,compound,neg,neu,pos
0,0.0516,0.049,0.901,0.051
1,0.7096,0.037,0.896,0.067
2,0.7964,0.038,0.886,0.076
3,0.7430,0.000,0.885,0.115
4,0.4019,0.000,0.958,0.042
5,0.7355,0.031,0.909,0.060
6,0.6124,0.000,0.926,0.074
7,0.8271,0.000,0.902,0.098
8,0.8271,0.000,0.903,0.097
9,0.9617,0.055,0.851,0.094


In [98]:
articles = articles.reset_index().drop('index', axis=1)
articles

Unnamed: 0,Article,Date,Symbol,Time,Title,Url
0,MOSCOW Jan 17 Russia's biggest home electron...,2014-01-17,MMM,"Fri Jan 17, 2014 | 7:49am EST",Russia's M.video says consumer confidence weak...,http://www.reuters.com//article/russia-mvideo-...
1,By Lewis Krauskopf Diversified manufacture...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:44am EST","3M revenue growth slows, misses Wall Street ta...",http://www.reuters.com//article/us-3m-results-...
2,By Lewis KrauskopfJan 30 Diversified manufac...,2014-01-30,MMM,"Thu Jan 30, 2014 | 9:42am EST","UPDATE 2-3M revenue growth slows, misses Wall ...",http://www.reuters.com//article/3m-results-idU...
3,Jan 30 Diversified manufacturer 3M Co poste...,2014-01-30,MMM,"Thu Jan 30, 2014 | 7:39am EST",Manufacturer 3M posts 11 percent rise in net i...,http://www.reuters.com//article/3m-results-idU...
4,3M Co. has invested US$15 million in one of ...,2014-03-10,MMM,"Sun Mar 9, 2014 | 8:19pm EDT",SINGAPORE PRESS-3M to lift N95 mask output at ...,http://www.reuters.com//article/singapore-pres...
5,March 18 Diversified manufacturer 3M Co exp...,2014-03-18,MMM,"Tue Mar 18, 2014 | 12:33am EDT",3M sees sales growth in China to triple global...,http://www.reuters.com//article/3m-china-sales...
6,"MOSCOW, March 27 Russian home electronics re...",2014-03-27,MMM,"Thu Mar 27, 2014 | 3:18am EDT",Russia's M.video 2013 net profit rises 38 pct,http://www.reuters.com//article/russia-mvideo-...
7,Post-it notes maker 3M Co (MMM.N) said its b...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:23pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/us-3m-sharehol...
8,April 15 Post-it notes maker 3M Co said its ...,2014-04-15,MMM,"Tue Apr 15, 2014 | 12:20pm EDT",3M board opposes proposal for shareholder acti...,http://www.reuters.com//article/3m-shareholder...
9,* Apple rallies a day after announcing 7-for...,2014-04-24,MMM,"Thu Apr 24, 2014 | 4:40pm EDT",US STOCKS-Apple buoys Nasdaq; Ukraine weighs o...,http://www.reuters.com//article/markets-usa-st...


In [100]:
articles = pd.concat([articles, sents], axis=1)

In [102]:
articles.to_csv('datasets/articles.csv', encoding='utf8')