In [2]:
from alpaca.common.rest import RESTClient
from alpaca.data import StockHistoricalDataClient
from alpaca.trading import TradingClient
import pandas as pd
import datetime

import pprint

import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
class DataManager(object):
    historicaldata_client: None
    trading_client:None
    rest_client:None

    def __init__(self):
        self.historicaldata_client = StockHistoricalDataClient(os.getenv("ALPACA_API_KEY"), os.getenv("ALPACA_SECRET_KEY"))
        self.trading_client = TradingClient(os.getenv("ALPACA_API_KEY"), os.getenv("ALPACA_SECRET_KEY"))
        self.rest_client =RESTClient(base_url='https://data.alpaca.markets',api_version='v1beta1',api_key=os.getenv("ALPACA_API_KEY"), secret_key=os.getenv("ALPACA_SECRET_KEY"),)

    # Fetch historical news
    def get_news(self, symbols:pd.Series, start_datetime:datetime, end_datetime:datetime):
        utc_start_datetime = pd.to_datetime(start_datetime, utc=True)
        utc_end_datetime = pd.to_datetime(end_datetime, utc=True)
        
        news_list = []
        page_token = None
        while True:
            news_endpoint = '/news'
            parameters = {'start':utc_start_datetime.isoformat(),
                        'end':utc_end_datetime.isoformat(),
                        'page_token':page_token,
                        'symbols':symbols.to_list()
            }

            resp = self.rest_client.get(news_endpoint, parameters,)
            page_token = resp.get('next_page_token')
            temp_list = resp.get('news')
 
            news_list.extend(temp_list)
            if not page_token:
                break
            
        return news_list

In [4]:
data_manager = DataManager()

# News to retrieve. Only able to retrieve 1 stock at a time.
symbols = pd.Series(['AAPL'])

# Start and end datetime for the news retrieval
start_datetime = datetime.datetime(2023, 1, 13)
end_datetime = datetime.datetime(2023, 1, 14)

In [5]:
news_list = data_manager.get_news(symbols, start_datetime, end_datetime)
pprint.pprint(news_list)

[{'author': 'Michael Horton',
  'content': '',
  'created_at': '2023-01-13T21:27:51Z',
  'headline': 'Top 15 Trending Stocks On WallStreetBets As Of Friday, Jan. 13, '
              '2023 (Via Swaggy Stocks)',
  'id': 30418206,
  'images': [],
  'source': 'benzinga',
  'summary': '',
  'symbols': ['AAPL',
              'AI',
              'AMZN',
              'BBBY',
              'COIN',
              'CVNA',
              'GLD',
              'JPM',
              'NEGG',
              'NFLX',
              'NVDA',
              'QQQ',
              'SLV',
              'SPCE',
              'TSLA'],
  'updated_at': '2023-01-13T21:27:51Z',
  'url': 'https://www.benzinga.com/trading-ideas/23/01/30418206/top-15-trending-stocks-on-wallstreetbets-as-of-friday-jan-13-2023-via-swaggy-stocks'},
 {'author': 'Benzinga Insights',
  'content': '',
  'created_at': '2023-01-13T17:41:11Z',
  'headline': "10 Information Technology Stocks Whale Activity In Today's "
              'Session',
  'id': 

In [6]:
df = pd.DataFrame(news_list)
df.head()
print(df)

              author content            created_at  \
0     Michael Horton          2023-01-13T21:27:51Z   
1  Benzinga Insights          2023-01-13T17:41:11Z   
2          AJ Fabino          2023-01-13T15:46:54Z   
3  Benzinga Newsdesk          2023-01-13T14:19:06Z   
4  Benzinga Newsdesk          2023-01-13T14:01:54Z   
5   Shanthi Rexaline          2023-01-13T06:00:13Z   
6  Shivdeep Dhaliwal          2023-01-13T02:19:21Z   
7        Bhavik Nair          2023-01-13T01:07:44Z   

                                            headline        id  \
0  Top 15 Trending Stocks On WallStreetBets As Of...  30418206   
1  10 Information Technology Stocks Whale Activit...  30414128   
2  Top-Performing Hedge Fund In 2022 Has This Str...  30396479   
3  Rosenblatt Maintains Buy on Apple, Lowers Pric...  30409696   
4  Benzinga Pro's Top 5 Stocks To Watch For Frida...  30409286   
5  iPhone's Long-Term Prospects Intact But Here's...  30403694   
6  Why Apple CEO Tim Cook Is Staring At A Big Pay..

In [7]:
# Save raw CSV directly from Bezinga
df.to_csv("raw.csv")

In [8]:
# Remove unecessary columns
df_cleaned = df.drop(['content', 'author', 'id', 'source', 'summary', 'updated_at', 'url', 'images'], axis=1)
df_cleaned.head()
print(df_cleaned)

             created_at                                           headline  \
0  2023-01-13T21:27:51Z  Top 15 Trending Stocks On WallStreetBets As Of...   
1  2023-01-13T17:41:11Z  10 Information Technology Stocks Whale Activit...   
2  2023-01-13T15:46:54Z  Top-Performing Hedge Fund In 2022 Has This Str...   
3  2023-01-13T14:19:06Z  Rosenblatt Maintains Buy on Apple, Lowers Pric...   
4  2023-01-13T14:01:54Z  Benzinga Pro's Top 5 Stocks To Watch For Frida...   
5  2023-01-13T06:00:13Z  iPhone's Long-Term Prospects Intact But Here's...   
6  2023-01-13T02:19:21Z  Why Apple CEO Tim Cook Is Staring At A Big Pay...   
7  2023-01-13T01:07:44Z  Tesla, Amazon, Apple, Bed Bath & Beyond, Marat...   

                                             symbols  
0  [AAPL, AI, AMZN, BBBY, COIN, CVNA, GLD, JPM, N...  
1  [AAPL, AMD, DELL, MARA, NVDA, QCOM, TACT, TXN,...  
2  [AAPL, ABBV, BAC, CRM, EQT, FXI, HYG, LLY, MSF...  
3                                             [AAPL]  
4                     

In [9]:
from openai import OpenAI

# Method 1
# Gather impact score based on news headline
def get_impact(headline):
    print("getting impact")
    client = OpenAI()

    response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "Only respond with a number from 1-100 detailing the impact of the headline."},
        {"role": "user", "content": "Given the headline '" + headline + "', show me a number from 1-100 detailing the impact of this headline."}
    ]
    )
    print(response.choices[0].message.content)
    return int(response.choices[0].message.content)

# Method 2
# Gather impact score list based on news headline
def get_list_impact(headline_lst):
    print("getting impact")
    client = OpenAI()

    response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
        {"role": "system", "content": "Given the following list of headlines, provide an impact score from 1 to 5 for each one, where 1 represents minimal impact and 5 represents maximum impact. Assign one score per headline."},
        {"role": "user", "content": "Given the list of headlines are: '" + headline_lst + "', Please respond with a list of scores, one for each headline in order, reflecting their impact based on the information provided."}
    ]
    )
    print(response.choices[0].message.content)
    return (response.choices[0].message.content)

Both methods below are shown to be extremely inconsistent, with consecutive trials returning different impact scores.
Additionally, prompting for Method 2 does not always return a list.

In [10]:
# Method 1: This would be more expensive because you're repeatedly sending the 'system' and 'user' message

impact_lst = []

for i in range(len(df_cleaned)):
    headline = df_cleaned['headline'][i]

    # get impact from headline here
    # Line below costs credits, be careful when running
    
    impact_score = get_impact(headline)
    
    impact_lst += [impact_score]

print(impact_lst)

getting impact
62
getting impact
75
getting impact
83
getting impact
72
getting impact
82
getting impact
72
getting impact
75
getting impact
83
[62, 75, 83, 72, 82, 72, 75, 83]


In [11]:
# # Method 2: Send one 'system' and 'user' message together with a list of headlines

# headline_lst = ""

# for i in range(len(df_cleaned)):
#     headline = df_cleaned['headline'][i]
#     headline_lst += (str(i+1) + '. ' + headline + "\n")

# print(headline_lst)

# # Line below costs credits, be careful when running

# impact_lst = get_list_impact(headline_lst)

In [12]:
# Add 'impact score' column to cleaned dataframe
df_cleaned = df.drop(['content', 'author', 'id', 'source', 'summary', 'updated_at', 'url', 'images'], axis=1)
df_cleaned.insert(3, "Impact Score", impact_lst, True)

df_impact = df_cleaned
df_impact.head()


Unnamed: 0,created_at,headline,symbols,Impact Score
0,2023-01-13T21:27:51Z,Top 15 Trending Stocks On WallStreetBets As Of...,"[AAPL, AI, AMZN, BBBY, COIN, CVNA, GLD, JPM, N...",62
1,2023-01-13T17:41:11Z,10 Information Technology Stocks Whale Activit...,"[AAPL, AMD, DELL, MARA, NVDA, QCOM, TACT, TXN,...",75
2,2023-01-13T15:46:54Z,Top-Performing Hedge Fund In 2022 Has This Str...,"[AAPL, ABBV, BAC, CRM, EQT, FXI, HYG, LLY, MSF...",83
3,2023-01-13T14:19:06Z,"Rosenblatt Maintains Buy on Apple, Lowers Pric...",[AAPL],72
4,2023-01-13T14:01:54Z,Benzinga Pro's Top 5 Stocks To Watch For Frida...,"[AAPL, APRN, NFLX, PANW, WEN]",82


In [13]:
# Save cleaned dataframe with impact scores to CSV
df_impact.to_csv("impact.csv")

PermissionError: [Errno 13] Permission denied: 'impact.csv'

In [17]:
df_impact['created_at'] = pd.to_datetime(df_impact['created_at'])

# Then, format these datetime objects to the desired format
df_impact['formatted_time'] = df_impact['created_at'].dt.strftime('%Y-%m-%d %H:%M:%S')

In [18]:
df_impact

Unnamed: 0,created_at,headline,symbols,Impact Score,formatted_time
0,2023-01-13 21:27:51+00:00,Top 15 Trending Stocks On WallStreetBets As Of...,"[AAPL, AI, AMZN, BBBY, COIN, CVNA, GLD, JPM, N...",62,2023-01-13 21:27:51
1,2023-01-13 17:41:11+00:00,10 Information Technology Stocks Whale Activit...,"[AAPL, AMD, DELL, MARA, NVDA, QCOM, TACT, TXN,...",75,2023-01-13 17:41:11
2,2023-01-13 15:46:54+00:00,Top-Performing Hedge Fund In 2022 Has This Str...,"[AAPL, ABBV, BAC, CRM, EQT, FXI, HYG, LLY, MSF...",83,2023-01-13 15:46:54
3,2023-01-13 14:19:06+00:00,"Rosenblatt Maintains Buy on Apple, Lowers Pric...",[AAPL],72,2023-01-13 14:19:06
4,2023-01-13 14:01:54+00:00,Benzinga Pro's Top 5 Stocks To Watch For Frida...,"[AAPL, APRN, NFLX, PANW, WEN]",82,2023-01-13 14:01:54
5,2023-01-13 06:00:13+00:00,iPhone's Long-Term Prospects Intact But Here's...,"[AAPL, TSM]",72,2023-01-13 06:00:13
6,2023-01-13 02:19:21+00:00,Why Apple CEO Tim Cook Is Staring At A Big Pay...,[AAPL],75,2023-01-13 02:19:21
7,2023-01-13 01:07:44+00:00,"Tesla, Amazon, Apple, Bed Bath & Beyond, Marat...","[AAPL, AMZN, BBBY, BTCUSD, ETHUSD, MARA, TSLA]",83,2023-01-13 01:07:44


In [22]:
df_impact_dropped = df_impact.drop(['created_at', 'headline', 'symbols'], axis=1)

In [31]:
df_impact_dropped

Unnamed: 0,Impact Score,formatted_time
0,62,2023-01-13 21:27:51
1,75,2023-01-13 17:41:11
2,83,2023-01-13 15:46:54
3,72,2023-01-13 14:19:06
4,82,2023-01-13 14:01:54
5,72,2023-01-13 06:00:13
6,75,2023-01-13 02:19:21
7,83,2023-01-13 01:07:44


In [34]:
df_impact_dropped.to_csv("impact3.csv")