# Correlating Returns

In [34]:
import alpaca_trade_api as tradeapi
import pandas as pd
from newsapi.newsapi_client import NewsApiClient
from datetime import date, datetime, timedelta
import os
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [36]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

## Get AAPL Returns for Past Month

In [37]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = '1D'

# Get current date and the date from one month ago
current_date = date.today()
past_date = date.today() - timedelta(weeks=4)

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(
    ticker,
    timeframe,
    limit=None,
    start=current_date,
    end=past_date,
    after=None,
    until=None,
).df

df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
2019-11-05 00:00:00-05:00,257.05,258.19,256.32,257.15,17738372
2019-11-06 00:00:00-05:00,256.84,257.49,255.365,257.24,15328064
2019-11-07 00:00:00-05:00,258.74,260.35,258.11,259.5,22267200
2019-11-08 00:00:00-05:00,258.69,260.44,256.85,260.14,15896858
2019-11-11 00:00:00-05:00,258.3,262.47,258.28,262.2,19401290


In [38]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df.drop(columns=['open', 'high', 'low', 'volume'], inplace=True)

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

df.head()

Unnamed: 0,close
2019-11-05,257.15
2019-11-06,257.24
2019-11-07,259.5
2019-11-08,260.14
2019-11-11,262.2


In [39]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change()
aapl_returns.head()

Unnamed: 0,close
2019-11-05,
2019-11-06,0.00035
2019-11-07,0.008786
2019-11-08,0.002466
2019-11-11,0.007919


In [40]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []
    date = current_date
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > past_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date),
            to=str(date),
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates



In [1]:
# Get first topic


In [2]:
# Get second topic


In [3]:
# Get third topic


In [4]:
# Get fourth topic


In [5]:
# Get fifth topic


In [45]:
# Instantiate SentimentIntensityAnalyzer


In [46]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment


In [67]:
# Get averages of each topics sentiment


In [70]:
# Combine Sentiment Averages into DataFrame


In [71]:
# Set the index value of the sentiment average DataFrame to be the series of dates.


In [6]:
# Merge with AAPL returns


In [7]:
# Correlate the headlines' sentiment to returns
