In [None]:
%run SequenceDeltaAlgo.ipynb
%run SentimentAnalysis.ipynb

In [None]:
import requests
from bs4 import BeautifulSoup
import os
import string
import pandas as pd
import re
from datetime import datetime
from datetime import timedelta

import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import seaborn as sns
sns.set(style="darkgrid")
import matplotlib.pyplot as plt

# Constants ------------------------------------------------------------------------------
# fed url
DAY_FS='[YYYYMMDD]'
FS_HOST= 'https://www.federalreserve.gov'
URL_FS= 'https://www.federalreserve.gov/newsevents/pressreleases/monetary[YYYYMMDD]a.htm'
URL_CALENDAR = 'https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'

global lastFSD


# Methods scraping and feed ---------------------------------------------------------------

def get_links():
    r = requests.get(URL_CALENDAR)
    soup = BeautifulSoup(r.text, 'html.parser')
    contents = soup.find_all('a', href=re.compile('^/newsevents/pressreleases/monetary\d{8}[ax].htm'))
    links = [FS_HOST + content.attrs['href'] for content in contents]
    fomc = {}
    for link in links:
        strDate = re.findall('[0-9]{8}', link)[0]
        fomc[strDate] = link
    return fomc
        



def strLoadArticle(strdate, url=None):
    datedArticle = 'fedStates/'+ strdate
    if os.path.exists(datedArticle):
        return datedArticle
    else:
        datedUrl = URL_FS.replace(DAY_FS, strdate) if url is None else url
        try:
            page = requests.get(datedUrl)
            soup = BeautifulSoup(page.content, 'html.parser')
            divs =  soup.find(id="article").findAll('div')
            ldivs = list(map(lambda d: len(d.findChildren("p", recursive=False)), divs))
            articleDiv = divs[ldivs.index(max(ldivs))]
            ps = articleDiv.findAll('p')
            
            for tag in articleDiv.findAll(True):
                if len(tag.attrs)> 0 :
                    tag.attrs = {}
                    content = list(map(lambda p: str(p), ps))
            with open(datedArticle, "w") as f:
                sad = str(" <br> ".join(content))
                sad += createSimpleNLP(sad, datedUrl, strdate)
                f.write(sad)
                
            return datedArticle 
        except:
            traceback.format_exc()
            return None
        
  
    
def extractArticle(sdate):
    datedArticle = strLoadArticle(sdate)
    if not datedArticle is None:
        with open(datedArticle, 'r') as f:
             content = f.read()
        return content
    else:
        return ""
        
def splitArticle(a):
    a = a.replace('<p>', '')
    a = a.replace('</p>', '')
    np = a.rfind('<div id="snlp"')
    content = filter(lambda w: w.strip() != '', a[0:np].split(' ')) 
    wordProxies = list(map(lambda w: WordProxy(w), content))
    score =  a[np:]
    return wordProxies, score 
 
    
def startupArticles():
    global lastFSD 
    lastFSD = None
    fomc = get_links()
    
    dateRegex = re.compile(r'(\d{8})')
    for datedFile in os.listdir('fedStates/'):
        if dateRegex.search(datedFile):
            fomc.pop(datedFile)
            if lastFSD is None:
                lastFSD = datedFile
            elif lastFSD < datedFile:
                lastFSD = datedFile
    
    for d, url in sorted(fomc.items()):
        if not strLoadArticle(d, url) is None:
            lastFSD = d


        
def dailyCheckArticle():
    global lastFSD
    sdnow = datetime.now().strftime("%Y%m%d")
    if lastFSD < sdnow:
        if not strLoadArticle(sdnow) is None:
            lastFSD = sdnow
            generateNLPGraph()
            return lastFSD
    return None


def createSimpleNLP(sad, url, d): 
    nlpBloc = """
                <br>
                <div id="snlp" style="background-color:#FCF3CF;"> 
                    <h3>Sentiment Analysis for Fed Statment date: <a href='[url]'>[date]</h3>
                    <h4><span style="color:blue">Net       : [=]</span></h4>
                    <h4><span style="color:green">Positive  : [+]</span></h4>
                    <h4><span style="color:red">Negative  : [-]</span></h4>
                  
                 </div>
              """
    nlpBloc = nlpBloc.replace('[url]', url)
    nlpBloc = nlpBloc.replace('[date]', d)
    
    pscore = calculateNLP(sad)
    nlpBloc = nlpBloc.replace('[=]', str(round(pscore["net"],6)))
    nlpBloc = nlpBloc.replace('[+]', str(round(pscore["pos"],6)))
    nlpBloc = nlpBloc.replace('[-]', str(round(pscore["neg"],6)))
    
    return nlpBloc



def parseSimpleNLP(nlpBloc):
    strScore = []
    for h4Bloc in nlpBloc.split('</h4>')[:-1] :
        strScore.append(h4Bloc.split('</span>')[1]) 
    return strScore


def createDeltaNLP(fix, new, old, sda0, sda1, nlpScore0, nlpScore1): 
    dNlpBloc = """
                <br>
                <div id="deltaNlp" style="background-color:#FCF3CF;"> 
                    <h3>Sentiment Analysis for Fed Statment Delta:</h3>
                    <table style="font-size:20px;width: 70%;">
                        <tr style="background-color:#DCDCDC;">
                          <th style="text-align: left;width: 19%;border-bottom: thin solid;"></th>
                          <th style="text-align: left;width: 18%;border-bottom: thin solid;"><span style="background-color:#ff6424;">[d0]</span></th>
                          <th style="text-align: left;width: 18%;border-bottom: thin solid;"><span style="background-color:lightgreen">[d1]</span></th>
                          <th style="text-align: left;width: 15%;border-bottom: thin solid;">Common items</th>
                          <th style="text-align: left;width: 15%;border-bottom: thin solid;color:red">Deleted items</th>
                          <th style="text-align: left;width: 15%;border-bottom: thin solid;color:green;">New items</th>
                        </tr>
                        <tr style="color:blue">
                          <td style="border-bottom: thin solid;">Net</td>
                          <td style="border-bottom: thin solid;">[so=]</td>
                          <td style="border-bottom: thin solid;">[sn=]</td>
                          <td style="border-bottom: thin solid;">[f=]</td>
                          <td style="border-bottom: thin solid;">[o=]</td>
                          <td style="border-bottom: thin solid;">[n=]</td>
                        </tr>
                        <tr style="color:green">
                          <td style="border-bottom: thin solid;">Positive</td>
                          <td style="border-bottom: thin solid;">[so+]</td>
                          <td style="border-bottom: thin solid;">[sn+]</td>
                          <td style="border-bottom: thin solid;">[f+]</td>
                          <td style="border-bottom: thin solid;">[o+]</td>
                          <td style="border-bottom: thin solid;">[n+]</td>
                        </tr>
                        <tr style="color:red">
                          <td style="border-bottom: thin solid;">Negative</td>
                          <td style="border-bottom: thin solid;">[so-]</td>
                          <td style="border-bottom: thin solid;">[sn-]</td>
                          <td style="border-bottom: thin solid;">[f-]</td>
                          <td style="border-bottom: thin solid;">[o-]</td>
                          <td style="border-bottom: thin solid;">[n-]</td>
                        </tr>
                      </table>
                 </div>
              """
    dNlpBloc = dNlpBloc.replace('[d0]', sda0)
    dNlpBloc = dNlpBloc.replace('[d1]', sda1)
    

    dNlpBloc = dNlpBloc.replace('[so=]', nlpScore0[0])
    dNlpBloc = dNlpBloc.replace('[so+]', nlpScore0[1])
    dNlpBloc = dNlpBloc.replace('[so-]', nlpScore0[2])
    
    dNlpBloc = dNlpBloc.replace('[sn=]', nlpScore1[0])
    dNlpBloc = dNlpBloc.replace('[sn+]', nlpScore1[1])
    dNlpBloc = dNlpBloc.replace('[sn-]', nlpScore1[2])
    
    fixscore = calculateNLP(fix)
    dNlpBloc = dNlpBloc.replace('[f=]', str(round(fixscore["net"],6)))
    dNlpBloc = dNlpBloc.replace('[f+]', str(round(fixscore["pos"],6)))
    dNlpBloc = dNlpBloc.replace('[f-]', str(round(fixscore["neg"],6)))
    
    newscore = calculateNLP(new)
    dNlpBloc = dNlpBloc.replace('[n=]', str(round(newscore["net"],6)))
    dNlpBloc = dNlpBloc.replace('[n+]', str(round(newscore["pos"],6)))
    dNlpBloc = dNlpBloc.replace('[n-]', str(round(newscore["neg"],6)))
    
    oldscore = calculateNLP(old)
    dNlpBloc = dNlpBloc.replace('[o=]', str(round(oldscore["net"],6)))
    dNlpBloc = dNlpBloc.replace('[o+]', str(round(oldscore["pos"],6)))
    dNlpBloc = dNlpBloc.replace('[o-]', str(round(oldscore["neg"],6)))
    
    return dNlpBloc


def calculateDeltaArticles(sdate0, sdate1):
    a0 =  extractArticle(sdate0)
    a1 =  extractArticle(sdate1)
    wordproxies0, nlp0 = splitArticle(a0)
    wordproxies1, nlp1 = splitArticle(a1)
    nlpScore0 = parseSimpleNLP(nlp0)
    nlpScore1 = parseSimpleNLP(nlp1)
    res, fix, new, old = processSequences(wordproxies1, wordproxies0)
    dNlpBloc = createDeltaNLP(fix, new, old, sdate0, sdate1, nlpScore0, nlpScore1)
    return res + '<br>' + dNlpBloc


def generateNLPGraph():
    # initialize list of lists
    tsScore = []
    dateRegex = re.compile(r'(\d{8})')
    for datedFile in os.listdir('fedStates/'):
        if dateRegex.search(datedFile):
            a =  extractArticle(datedFile)
            w, nlp = splitArticle(a)
            score = [datedFile[0:4] + '-' + datedFile[4:6] + '-' + datedFile[6:8] , float(parseSimpleNLP(nlp)[0])] 
            tsScore.append(score)
    colors = ['green']
    df = pd.DataFrame(tsScore, columns =['Date', 'Net'])
    df = df.sort_values(by=['Date'])
    
    df['Change'] = (df['Net'].shift(1) / df['Net'])
    
    #TODO smooting curve
    
    df = df.melt('Date', var_name='NLP',  value_name='score')
    
    sns.set_palette(sns.color_palette(colors))
    sns_plot = sns.factorplot(x="Date", y="score", hue='NLP', data=df).set(title='Change in sentiment over time (first derivative)')
    sns_plot.set_xticklabels(rotation=45) 
    sns_plot.fig.set_figwidth(24)
    sns_plot.fig.set_figheight(8)
    sns_plot.savefig("./static/historicNlp.png")
    

In [None]:
generateNLPGraph()