# Code to collect Daily Prices and Sentiment data

## 1. Loading Libraries

In [3]:
import requests
import json
import csv
import pandas as pd
pd.set_option('display.max_columns', None)
from pandas import json_normalize
import glob
import ast 

## 2. Defining API Key and Companies to review

In [4]:
api_key = '6CDMRKZS34KKOJV8'
companies = ['AMZN','GOOGL','AAPL','MSFT','NVDA'] # List of companies should be updated (5 companies per minute)

## 3. Getting Daily Prices data

In [6]:
duration = 'DAILY'
dailyPricesData = pd.DataFrame()
for ticker in companies: # Change range of values because we can only retrieve data of 5 companies with the free API key
    # Getting data for each ticker
    dailyDataTicker = requests.get(f"https://www.alphavantage.co/query?function=TIME_SERIES_{duration}&symbol={ticker}&apikey={api_key}&outputsize=full").json()
    # Converting data in a Data Frame
    dailyDataTicker = pd.DataFrame(dailyDataTicker['Time Series (Daily)']).transpose().reset_index().rename(columns={'index': 'date'})
    # Adding column with Ticker name
    dailyDataTicker['TickerName'] = ticker
    # Adding Data Frame to monthly data
    dailyPricesData = pd.concat([dailyPricesData, dailyDataTicker], ignore_index=True)

In [None]:
# Visualizing data frame
dailyPricesData

In [None]:
# Saving data frame as csv file
dailyPricesData.to_csv('DailyPricesTechCompanies.csv', index=False)

## 4. Getting News & Sentiment data

Topics covered in this script: ipo, technology, and earnings. We need to run the same code for each topic. More topics could be add if required. See: https://www.alphavantage.co/documentation/#news-sentiment

Sentiment Score definition:

x <= -0.35          : Bearish

-0.35 < x <= -0.15  : Somewhat-Bearish

-0.15 < x < 0.15    : Neutral

0.15 <= x < 0.35    : Somewhat_Bullish

x >= 0.35           : Bullish'

In [78]:
# Creating empty List
sentimentData = list()
# Getting News & Sentiment for compnaies
for ticker in companies:
    # Getting overview of each compnay
    #url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}&limit=1000&sort=LATEST&topics=ipo'
    #url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}&limit=1000&sort=LATEST&topics=technology'
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={ticker}&apikey={api_key}&limit=1000&sort=LATEST&topics=earnings'
    urlContent = requests.get(url).json()
    # Adding dictionary to list
    sentimentData.append(urlContent)
# Creating data frame with list
sentimentNews = pd.DataFrame(sentimentData)
# Removing companies without articles
sentimentNews = sentimentNews[sentimentNews['items'] != '0']
# For each line, feed has a list of dictionaries.
# Each dicitonary represents an article. We will create a row per article
sentimentNews = sentimentNews.explode('feed')
sentimentNews.reset_index(drop=True,inplace=True)
# The key of each dictionary describes a feature. We will create a new column for each dictionary key
feedData = pd.json_normalize(sentimentNews['feed'])
sentimentNews = pd.concat([sentimentNews,feedData], axis=1).drop(['feed','items',
                                                                  'sentiment_score_definition',
                                                                  'relevance_score_definition'], axis=1)
# Even though the API provides us with an overall sentiment score and label columns,
# column ticker_sentiment contains a list of dictionaries with sentiment scores and labels per company.
# First, we will create a column to record the number of companies related to the article
sentimentNews['numberOfCompanies'] = sentimentNews.apply(lambda row: len(row['ticker_sentiment']), axis=1)
# Creating a row per company (Dictionary)
sentimentNews = sentimentNews.explode('ticker_sentiment')
sentimentNews.reset_index(drop=True,inplace=True)
# Expanding keys of dictionaries
tickerSentimentData = pd.json_normalize(sentimentNews['ticker_sentiment'])
sentimentNews = pd.concat([sentimentNews,tickerSentimentData], axis=1).drop(['ticker_sentiment'], axis=1)
sentimentNews = sentimentNews.rename(columns={'ticker':'companyName',
                                              'relevance_score':'companyRelevanceScore',
                                              'ticker_sentiment_score':'companySentimentScore',
                                              'ticker_sentiment_label':'companySentimentLabel'})
# Each article is related to different topics
# Let's create a row per topic
sentimentNews = sentimentNews.explode('topics')
sentimentNews.reset_index(drop=True,inplace=True)
# Let's create a column for the topic and topic relevance score
topicData = pd.json_normalize(sentimentNews['topics'])
sentimentNews = pd.concat([sentimentNews,topicData], axis=1).drop(['topics','authors'], axis=1)
sentimentNews = sentimentNews.rename(columns={'relevance_score':'topicRelevanceScore'})
# Since more than one company can be related to the article, let's remove duplicate rows
sentimentNews = sentimentNews.drop_duplicates()
sentimentNews.reset_index(drop=True,inplace=True)

In [None]:
sentimentNews

In [None]:
# Saving data frame as csv file
sentimentNews.to_csv('SentimentNewsTechCompanies.csv', index=False)