In [33]:
from alpaca.common.rest import RESTClient
from alpaca.data import StockHistoricalDataClient
from alpaca.trading import TradingClient
import pandas as pd
import datetime

import pprint

import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
class DataManager(object):
    historicaldata_client: None
    trading_client:None
    rest_client:None

    def __init__(self):
        self.historicaldata_client = StockHistoricalDataClient(os.getenv("ALPACA_API_KEY"), os.getenv("ALPACA_SECRET_KEY"))
        self.trading_client = TradingClient(os.getenv("ALPACA_API_KEY"), os.getenv("ALPACA_SECRET_KEY"))
        self.rest_client =RESTClient(base_url='https://data.alpaca.markets',api_version='v1beta1',api_key=os.getenv("ALPACA_API_KEY"), secret_key=os.getenv("ALPACA_SECRET_KEY"),)

    # Fetch historical news
    def get_news(self, symbols:pd.Series, start_datetime:datetime, end_datetime:datetime):
        utc_start_datetime = pd.to_datetime(start_datetime, utc=True)
        utc_end_datetime = pd.to_datetime(end_datetime, utc=True)
        
        news_list = []
        page_token = None
        while True:
            news_endpoint = '/news'
            parameters = {'start':utc_start_datetime.isoformat(),
                        'end':utc_end_datetime.isoformat(),
                        'page_token':page_token,
                        'symbols':symbols.to_list()
            }

            resp = self.rest_client.get(news_endpoint, parameters,)
            page_token = resp.get('next_page_token')
            temp_list = resp.get('news')
 
            news_list.extend(temp_list)
            if not page_token:
                break
            
        return news_list

In [183]:
data_manager = DataManager()

# News to retrieve. Only able to retrieve 1 stock at a time.
symbols = pd.Series(['AAPL'])

# Start and end datetime for the news retrieval
start_datetime = datetime.datetime(2023, 1, 13)
end_datetime = datetime.datetime(2023, 1, 14)

In [184]:
news_list = data_manager.get_news(symbols, start_datetime, end_datetime)
pprint.pprint(news_list)

[{'author': 'Michael Horton',
  'content': '',
  'created_at': '2023-01-13T21:27:51Z',
  'headline': 'Top 15 Trending Stocks On WallStreetBets As Of Friday, Jan. 13, '
              '2023 (Via Swaggy Stocks)',
  'id': 30418206,
  'images': [],
  'source': 'benzinga',
  'summary': '',
  'symbols': ['AAPL',
              'AI',
              'AMZN',
              'BBBY',
              'COIN',
              'CVNA',
              'GLD',
              'JPM',
              'NEGG',
              'NFLX',
              'NVDA',
              'QQQ',
              'SLV',
              'SPCE',
              'TSLA'],
  'updated_at': '2023-01-13T21:27:51Z',
  'url': 'https://www.benzinga.com/trading-ideas/23/01/30418206/top-15-trending-stocks-on-wallstreetbets-as-of-friday-jan-13-2023-via-swaggy-stocks'},
 {'author': 'Benzinga Insights',
  'content': '',
  'created_at': '2023-01-13T17:41:11Z',
  'headline': "10 Information Technology Stocks Whale Activity In Today's "
              'Session',
  'id': 

In [185]:
df = pd.DataFrame(news_list)
df.head()
print(df)

              author content            created_at  \
0     Michael Horton          2023-01-13T21:27:51Z   
1  Benzinga Insights          2023-01-13T17:41:11Z   
2          AJ Fabino          2023-01-13T15:46:54Z   
3  Benzinga Newsdesk          2023-01-13T14:19:06Z   
4  Benzinga Newsdesk          2023-01-13T14:01:54Z   
5   Shanthi Rexaline          2023-01-13T06:00:13Z   
6  Shivdeep Dhaliwal          2023-01-13T02:19:21Z   
7        Bhavik Nair          2023-01-13T01:07:44Z   

                                            headline        id  \
0  Top 15 Trending Stocks On WallStreetBets As Of...  30418206   
1  10 Information Technology Stocks Whale Activit...  30414128   
2  Top-Performing Hedge Fund In 2022 Has This Str...  30396479   
3  Rosenblatt Maintains Buy on Apple, Lowers Pric...  30409696   
4  Benzinga Pro's Top 5 Stocks To Watch For Frida...  30409286   
5  iPhone's Long-Term Prospects Intact But Here's...  30403694   
6  Why Apple CEO Tim Cook Is Staring At A Big Pay..

In [186]:
# Save raw CSV directly from Bezinga
df.to_csv("raw.csv")

In [194]:
# Remove unecessary columns
df_cleaned = df.drop(['content', 'author', 'id', 'source', 'summary', 'updated_at', 'url', 'images'], axis=1)
df_cleaned.head()
print(df_cleaned)

             created_at                                           headline  \
0  2023-01-13T21:27:51Z  Top 15 Trending Stocks On WallStreetBets As Of...   
1  2023-01-13T17:41:11Z  10 Information Technology Stocks Whale Activit...   
2  2023-01-13T15:46:54Z  Top-Performing Hedge Fund In 2022 Has This Str...   
3  2023-01-13T14:19:06Z  Rosenblatt Maintains Buy on Apple, Lowers Pric...   
4  2023-01-13T14:01:54Z  Benzinga Pro's Top 5 Stocks To Watch For Frida...   
5  2023-01-13T06:00:13Z  iPhone's Long-Term Prospects Intact But Here's...   
6  2023-01-13T02:19:21Z  Why Apple CEO Tim Cook Is Staring At A Big Pay...   
7  2023-01-13T01:07:44Z  Tesla, Amazon, Apple, Bed Bath & Beyond, Marat...   

                                             symbols  
0  [AAPL, AI, AMZN, BBBY, COIN, CVNA, GLD, JPM, N...  
1  [AAPL, AMD, DELL, MARA, NVDA, QCOM, TACT, TXN,...  
2  [AAPL, ABBV, BAC, CRM, EQT, FXI, HYG, LLY, MSF...  
3                                             [AAPL]  
4                     

In [143]:
from openai import OpenAI

# Method 1
# Gather impact score based on news headline
def get_impact(headline):
    print("getting impact")
    client = OpenAI()

    response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "Only respond with a number from 1-100 detailing the impact of the headline."},
        {"role": "user", "content": "Given the headline '" + headline + "', show me a number from 1-100 detailing the impact of this headline."}
    ]
    )
    print(response.choices[0].message.content)
    return int(response.choices[0].message.content)

# Method 2
# Gather impact score list based on news headline
def get_list_impact(headline_lst):
    print("getting impact")
    client = OpenAI()

    response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "Given the following list of headlines, provide an impact score from 1 to 5 for each one, where 1 represents minimal impact and 5 represents maximum impact. Assign one score per headline."},
        {"role": "user", "content": "Given the list of headlines are: '" + headline_lst + "', Please respond with a list of scores, one for each headline in order, reflecting their impact based on the information provided."}
    ]
    )
    print(response.choices[0].message.content)
    return (response.choices[0].message.content)

Both methods below are shown to be extremely inconsistent, with consecutive trials returning different impact scores.
Additionally, prompting for Method 2 does not always return a list.

In [188]:
# Method 1: This would be expensive because you're repeatedly sending the 'system' and 'user' message

impact_lst = []

for i in range(len(df_cleaned)):
    headline = df_cleaned['headline'][i]

    # get impact from headline here
    # Line below costs credits, be careful when running
    
    impact_score = get_impact(headline)
    
    impact_lst += [impact_score]

print(impact_lst)

getting impact
72
getting impact
85
getting impact
78
getting impact
55
getting impact
78
getting impact
78
getting impact
78
getting impact
79
[72, 85, 78, 55, 78, 78, 78, 79]


In [145]:
# Method 2: Send one 'system' and 'user' message together with a list of headlines

headline_lst = ""

for i in range(len(df_cleaned)):
    headline = df_cleaned['headline'][i]
    headline_lst += (str(i+1) + '. ' + headline + "\n")

print(headline_lst)

# Line below costs credits, be careful when running

# impact_lst = get_list_impact(headline_lst)

1. Meta Needs To Slash Another 7.5K Jobs To Accommodate Metaverse Ambitions, Analyst Says
2. Spotify Likely For Upside From Gross Profit Leverage, Pricing Power From Music Subscriptions, Analyst Says
3. Dollar Weakness, Easing Of Supply Constraints Likely To Add To Better Margins For Apple, Analyst Says
4. The Unbelievable Story Of The 'Pokémon Go Grandpa,' The 74-Year-Old Cycling Around With 64 Devices Attached To His Bicycle
5. Apple Analyst Ming-Chi Kuo Earlier Tweeted "Anjie Technology Will Be The New Beneficiary Of The All-new Design Foldable iPad. There May Be No New iPad Releases In The Next 9-12 Months As The iPad Mini Refresh Is More Likely To Begin Mass Production In Q1 2024"
7. Masimo Wins IPR Patent Trial Vs Apple
8. Apple Expert Warns About 'Troll Account' Pushing 'Fake' iOS 17 Stories: 'Surprised At Reputable Sites Covering It'
9. Google Rolling Out Privacy Feature For Android Users That Already Exists On iPhones
10. Meta Quest Pro Available At $400 Discount Now — A Good 

In [195]:
# Add 'impact score' column to cleaned dataframe
df_cleaned = df.drop(['content', 'author', 'id', 'source', 'summary', 'updated_at', 'url', 'images'], axis=1)
df_cleaned.insert(3, "Impact Score", impact_lst, True)

df_impact = df_cleaned
df_impact.head()


Unnamed: 0,created_at,headline,symbols,Impact Score
0,2023-01-13T21:27:51Z,Top 15 Trending Stocks On WallStreetBets As Of...,"[AAPL, AI, AMZN, BBBY, COIN, CVNA, GLD, JPM, N...",72
1,2023-01-13T17:41:11Z,10 Information Technology Stocks Whale Activit...,"[AAPL, AMD, DELL, MARA, NVDA, QCOM, TACT, TXN,...",85
2,2023-01-13T15:46:54Z,Top-Performing Hedge Fund In 2022 Has This Str...,"[AAPL, ABBV, BAC, CRM, EQT, FXI, HYG, LLY, MSF...",78
3,2023-01-13T14:19:06Z,"Rosenblatt Maintains Buy on Apple, Lowers Pric...",[AAPL],55
4,2023-01-13T14:01:54Z,Benzinga Pro's Top 5 Stocks To Watch For Frida...,"[AAPL, APRN, NFLX, PANW, WEN]",78


In [197]:
# Save cleaned dataframe with impact scores to CSV
df_impact.to_csv("impact.csv")