# Correlating Returns

In [1]:
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer

## Load API Keys from Environment Variables

In [13]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])
#newsapi = NewsApiClient(api_key=os.getenv("NEWS_API_KEY"))

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_API_SECRET_KEY")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

print(f"News API Key type: {type(newsapi)}")
print(f"News ALPACA Key type: {type(alpaca_api_key)}")
print(f"News ALPACA SECRET Key type: {type(alpaca_secret_key)}")

News API Key type: <class 'newsapi.newsapi_client.NewsApiClient'>
News ALPACA Key type: <class 'str'>
News ALPACA SECRET Key type: <class 'str'>


## Get AAPL Returns for Past Month

In [6]:
# Set the ticker
ticker = "AAPL"

# Set timeframe to '1D'
timeframe = "1D"

# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()

# Get 4 weeks worth of historical data for AAPL
df = api.get_barset(ticker,
                    timeframe,
                    limit=None,
                    start=past_date,
                    end=current_date,
                    after=None,
                    until=None).df

# Display data
df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2021-06-30 00:00:00-04:00,136.17,137.41,135.87,136.94,51014239
2021-07-01 00:00:00-04:00,136.6,137.33,135.76,137.27,46267213
2021-07-02 00:00:00-04:00,137.92,140.0,137.745,139.96,69160944
2021-07-06 00:00:00-04:00,140.1,143.15,140.07,142.02,97272256
2021-07-07 00:00:00-04:00,143.54,144.89,142.66,144.59,93223232


In [7]:
# Drop Outer Table Level
df = df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
df = df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
df.index = df.index.date

# Display sample data
df.head()

Unnamed: 0,close
2021-06-30,136.94
2021-07-01,137.27
2021-07-02,139.96
2021-07-06,142.02
2021-07-07,144.59


In [8]:
# Use the `pct_change` function to calculate daily returns of AAPL
aapl_returns = df.pct_change().dropna()

# Display sample data
aapl_returns.head()

Unnamed: 0,close
2021-07-01,0.00241
2021-07-02,0.019596
2021-07-06,0.014718
2021-07-07,0.018096
2021-07-08,-0.008783


In [9]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    
    all_dates = []    
    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    
    print(f"Fetching news about '{keyword}'")
    
    print("*" * 30)
    
    while date > end_date:
        
        print(f"retrieving news from: {date}")
        
        articles = newsapi.get_everything(q=keyword,
                                          from_param=str(date)[:10],
                                          to=str(date)[:10],
                                          language="en",
                                          sort_by="relevancy",
                                          page=1)
        headlines = []
        
        for i in range(0, len(articles["articles"])):
            
            headlines.append(articles["articles"][i]["title"])
            
        all_headlines.append(headlines)
        
        all_dates.append(date)
        
        date = date - timedelta(days=1)
        
    return all_headlines, all_dates

Note: Be aware that running the 3 requests below will only work once within a 24 hour period due to the request limits imposed by the API provider.

In [14]:
# Get first topic
apple_news = get_headlines('apple')

Fetching news about 'apple'
******************************
retrieving news from: 2021-07-30 00:00:00
retrieving news from: 2021-07-29 00:00:00
retrieving news from: 2021-07-28 00:00:00
retrieving news from: 2021-07-27 00:00:00
retrieving news from: 2021-07-26 00:00:00
retrieving news from: 2021-07-25 00:00:00
retrieving news from: 2021-07-24 00:00:00
retrieving news from: 2021-07-23 00:00:00
retrieving news from: 2021-07-22 00:00:00
retrieving news from: 2021-07-21 00:00:00
retrieving news from: 2021-07-20 00:00:00
retrieving news from: 2021-07-19 00:00:00
retrieving news from: 2021-07-18 00:00:00
retrieving news from: 2021-07-17 00:00:00
retrieving news from: 2021-07-16 00:00:00
retrieving news from: 2021-07-15 00:00:00
retrieving news from: 2021-07-14 00:00:00
retrieving news from: 2021-07-13 00:00:00
retrieving news from: 2021-07-12 00:00:00
retrieving news from: 2021-07-11 00:00:00
retrieving news from: 2021-07-10 00:00:00
retrieving news from: 2021-07-09 00:00:00
retrieving news f

In [15]:
# Get second topic
trade_news = get_headlines('trade')

Fetching news about 'trade'
******************************
retrieving news from: 2021-07-30 00:00:00
retrieving news from: 2021-07-29 00:00:00
retrieving news from: 2021-07-28 00:00:00
retrieving news from: 2021-07-27 00:00:00
retrieving news from: 2021-07-26 00:00:00
retrieving news from: 2021-07-25 00:00:00
retrieving news from: 2021-07-24 00:00:00
retrieving news from: 2021-07-23 00:00:00
retrieving news from: 2021-07-22 00:00:00
retrieving news from: 2021-07-21 00:00:00
retrieving news from: 2021-07-20 00:00:00
retrieving news from: 2021-07-19 00:00:00
retrieving news from: 2021-07-18 00:00:00
retrieving news from: 2021-07-17 00:00:00
retrieving news from: 2021-07-16 00:00:00
retrieving news from: 2021-07-15 00:00:00
retrieving news from: 2021-07-14 00:00:00
retrieving news from: 2021-07-13 00:00:00
retrieving news from: 2021-07-12 00:00:00
retrieving news from: 2021-07-11 00:00:00
retrieving news from: 2021-07-10 00:00:00
retrieving news from: 2021-07-09 00:00:00
retrieving news f

In [16]:
# Get third topic
economy_news = get_headlines('economy')

Fetching news about 'economy'
******************************
retrieving news from: 2021-07-30 00:00:00
retrieving news from: 2021-07-29 00:00:00
retrieving news from: 2021-07-28 00:00:00
retrieving news from: 2021-07-27 00:00:00
retrieving news from: 2021-07-26 00:00:00
retrieving news from: 2021-07-25 00:00:00
retrieving news from: 2021-07-24 00:00:00
retrieving news from: 2021-07-23 00:00:00
retrieving news from: 2021-07-22 00:00:00
retrieving news from: 2021-07-21 00:00:00
retrieving news from: 2021-07-20 00:00:00
retrieving news from: 2021-07-19 00:00:00
retrieving news from: 2021-07-18 00:00:00
retrieving news from: 2021-07-17 00:00:00
retrieving news from: 2021-07-16 00:00:00
retrieving news from: 2021-07-15 00:00:00
retrieving news from: 2021-07-14 00:00:00
retrieving news from: 2021-07-13 00:00:00


NewsAPIException: {'status': 'error', 'code': 'rateLimited', 'message': 'You have made too many requests recently. Developer accounts are limited to 100 requests over a 24 hour period (50 requests available every 12 hours). Please upgrade to a paid plan if you need more requests.'}

In [17]:
# Instantiate SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [43]:
# Create function that computes average compound sentiment of headlines for each day
def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(analyzer.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment

In [60]:
# Apple sentiment analysis - Compound score
apple_headlines = apple_news[0][1:]

apple_headlines[0]

apple_sentiment= headline_sentiment_summarizer_avg(apple_headlines)

apple_sentiment[0]

0.10927499999999998

In [61]:
# Trade sentiment analysis - Compound score
trade_headlines = trade_news[0][1:]

trade_headlines[0]

trade_sentiment= headline_sentiment_summarizer_avg(trade_headlines)

trade_sentiment[0]

-0.04529

In [62]:
# Trade sentiment analysis - Compound score
economy_headlines = economy_news[0][1:]

economy_headlines[0]

economy_sentiment= headline_sentiment_summarizer_avg(economy_headlines)

economy_sentiment[0]

NameError: name 'economy_news' is not defined

In [79]:
# Combine Sentiment Averages into DataFrame
comb_sents = list(zip(apple_sentiment, trade_sentiment))

sentiment_df = pd.DataFrame(comb_sents,
                            columns=["APPLE", "TRADE"])

sentiment_df.head()

Unnamed: 0,APPLE,TRADE
0,0.109275,-0.04529
1,0.04671,-0.016005
2,0.10208,0.04145
3,0.104655,0.056595
4,0.08784,0.073835


In [80]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
dates = apple_news[1][1:]

sentiment_df["DATES"] = dates

sentiment_df.set_index("DATES", inplace=True)

sentiment_df.head()

Unnamed: 0_level_0,APPLE,TRADE
DATES,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-07-29,0.109275,-0.04529
2021-07-28,0.04671,-0.016005
2021-07-27,0.10208,0.04145
2021-07-26,0.104655,0.056595
2021-07-25,0.08784,0.073835


In [83]:
sentiment_df.shape

(29, 2)

In [91]:
# Merge with AAPL returns
merged_df = pd.concat([aapl_returns['close'], sentiment_df], axis=1)

merged_final_df = merged_df.dropna()

merged_final_df.head()

Unnamed: 0,close,APPLE,TRADE
2021-07-01,0.00241,0.05852,-0.006305
2021-07-02,0.019596,0.08305,-0.01371
2021-07-06,0.014718,0.06753,-0.122905
2021-07-07,0.018096,-0.093715,0.008625
2021-07-08,-0.008783,0.04035,0.030665


In [93]:
merged_final_df.shape

(20, 3)

In [96]:
# Correlate the headlines' sentiment to returns
correlation = merged_final_df.corr()

correlation

Unnamed: 0,close,APPLE,TRADE
close,1.0,0.06379,-0.142225
APPLE,0.06379,1.0,0.160219
TRADE,-0.142225,0.160219,1.0


In [16]:
# Correlate the headlines' sentiment to returns
# YOUR CODE HERE

Unnamed: 0,close,aapl_avg,trade_avg,economy_avg
close,1.0,0.547862,0.455142,-0.0524333
aapl_avg,0.547862,1.0,-0.0109107,-0.425666
trade_avg,0.455142,-0.0109107,1.0,0.170973
economy_avg,-0.0524333,-0.425666,0.170973,1.0
