In [24]:
from IPython.core.display import HTML
import requests
import time
import pandas as pd
import datetime
HTML("""
<style>
.container { width:100% !important; }
</style>
""")

### Go here to get API KEY
* https://www.alphavantage.co/support/#api-key

In [25]:
api_key = PASTE_YOUR_API_KEY_HERE

##### Helper Functions
* _get_data: get data from alpha vantage
* _get_label_sentiment: converts scores to numbers

In [26]:
def _get_data(symbols,time_from,time_to,api_key):
    url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbols}&time_from={time_from}&time_to={time_to}&limit=1000&apikey={api_key}"
    r = requests.get(url)
    data = r.json()
    return data

def _get_label_sentiment(x):
    if x <= -0.35:
        return 'Bearish','Bearish'
    elif -0.35 < x <= -0.15:
        return 'Somewhat-Bearish','Bearish'
    elif -0.15 < x < 0.15:
        return 'Neutral','Neutral'
    elif 0.15 <= x < 0.35:
        return 'Somewhat_Bullish','Bullish'
    else:  # x >= 0.35
        return 'Bullish','Bullish'

### Get dataset going backward in time
* Initially set time_to = '' meaning till current time
* Then set it to earliest time after each api call
* Set a time_from very far into past
* Eventually this won't work when we are done with all the data...

In [27]:
def get_dataset(time_from="20030410T0130",
                time_to='',
                MAX_API_CALLS_PER_DAY = 25, # Free tier only allows 25 API calls per day
                MAX_API_CALLS_PER_MIN = 5 # Free tier only allows 5 api calls per minute
               ):
    data_list=[]
    for i in range(1,MAX_API_CALLS_PER_DAY+1): 
        if i%5==0: 
            time.sleep(60)
        
        data=_get_data('TSLA',time_from,time_to,api_key)
        if 'feed' not in data:break
        if len(data['feed'])==0: break
        data_list.append(data)
        time_to=data['feed'][-1]['time_published'][:-2] # Take all the way up to last 2 since api only takes minute level granularity
    df=pd.concat([pd.DataFrame(data['feed']) for data in data_list])
    # Extract TSLA specific relevance (we didn't use it in video)
    df['ticker_relevance_TSLA']=df['ticker_sentiment'].apply(lambda l:[el for el in l if el['ticker']=='TSLA'][0]['relevance_score']).astype(float)
    # Extract TSLA specific sentiment
    df['ticker_sentiment_TSLA']=df['ticker_sentiment'].apply(lambda l:[el for el in l if el['ticker']=='TSLA'][0]['ticker_sentiment_score']).astype(float)
    # Only take tickers with TSLA in headline 
    df=df[df.title.str.contains('tsla|tesla',case=False)]
    # Extract # of tickers
    df['num_tickers']=df.ticker_sentiment.apply(lambda l:len(l))
    # Only take when # of tickers = 1
    df = df[df.num_tickers==1]
    # Applying the function and creating two new columns
    df[['detailed_original_label','label']] = df.apply(lambda row: _get_label_sentiment(row['ticker_sentiment_TSLA']), axis=1, result_type='expand')
    # Drop duplicates..
    df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
    # Set index to time published
    df.set_index('time_published',inplace=True)
    # Sort by time published
    df.sort_index(inplace=True)
    return df
    
        

### Run this to get dataset and save to CSV file
* Takes 5 mins LOL!
* You can stop here after saving the CSV and can run the colab notebook...

In [28]:
df = get_dataset(time_to='')
df.to_csv('tsla_sentiment.csv')

In [29]:
df

Unnamed: 0_level_0,title,url,authors,summary,banner_image,source,category_within_source,source_domain,topics,overall_sentiment_score,overall_sentiment_label,ticker_sentiment,ticker_relevance_TSLA,ticker_sentiment_TSLA,num_tickers,detailed_original_label,label
time_published,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
20240424T132555,Tesla Sent a Clear Message to Wall Street on T...,https://www.fool.com/investing/2024/04/24/tesl...,[Neil Rozenbaum],The year will be a defining moment for Tesla a...,https://g.foolcdn.com/editorial/images/774113/...,Motley Fool,,www.fool.com,"[{'topic': 'Earnings', 'relevance_score': '0.3...",0.211311,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.9470...",0.947069,0.510862,1,Bullish,Bullish
20240424T134400,Tesla stock soars 14% after Elon Musk commits ...,https://markets.businessinsider.com/news/stock...,[Matthew Fox],"Tesla Stock Price up 14% on Q1 Earnings, Musk ...",,Business Insider,GoogleRSS,markets.businessinsider.com,"[{'topic': 'Earnings', 'relevance_score': '0.9...",0.069328,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.8078...",0.807885,0.081196,1,Neutral,Neutral
20240424T140000,Tesla Rises on Future EV Plans Despite Q1 Miss...,https://www.zacks.com/stock/news/2261222/tesla...,[Sweta Killa],Tesla (TSLA) misses estimates on both earnings...,https://staticx-tuner.zacks.com/images/article...,Zacks Commentary,,www.zacks.com,"[{'topic': 'Earnings', 'relevance_score': '0.8...",0.121240,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.4535...",0.453501,0.200087,1,Somewhat_Bullish,Bullish
20240424T140700,Tesla is headed for a new growth phase as Elon...,https://markets.businessinsider.com/news/stock...,[Jennifer Sor],"Elon Musk Finally the 'Adult in the Room,' Tes...",,Business Insider,GoogleRSS,markets.businessinsider.com,"[{'topic': 'Earnings', 'relevance_score': '0.6...",0.230267,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.5197...",0.519739,0.292953,1,Somewhat_Bullish,Bullish
20240424T150314,Electric car Tesla may set up manufacturing pl...,https://www.business-standard.com/industry/aut...,[Surajeet Das Gupta],Tesla's plan to manufacture its affordable ele...,https://bsmedia.business-standard.com/_media/b...,Business Standard,GoogleRSS,www.business-standard.com,"[{'topic': 'Earnings', 'relevance_score': '0.4...",0.081131,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.2743...",0.274320,0.055671,1,Neutral,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240505T070604,$299 Tesla Model 3 'Too Good' Lease Makes It C...,https://www.forbes.com/sites/brookecrothers/20...,[Brooke Crothers],Tesla is offering curiously cheap leases on it...,https://imageio.forbes.com/specials-images/ima...,Forbes,,www.forbes.com,"[{'topic': 'Earnings', 'relevance_score': '0.1...",0.178528,Somewhat-Bullish,"[{'ticker': 'TSLA', 'relevance_score': '0.5372...",0.537213,0.309625,1,Somewhat_Bullish,Bullish
20240505T081048,Indian EV startup extends offer to Tesla inter...,https://www.business-standard.com/india-news/i...,[Press Trust of India],Bengaluru-based electric vehicle startup Prava...,https://bsmedia.business-standard.com/_media/b...,Business Standard,GoogleRSS,www.business-standard.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.149167,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.7232...",0.723234,0.185315,1,Somewhat_Bullish,Bullish
20240505T133014,Russian Troops Enter US Military Base In Niger...,https://www.benzinga.com/news/24/05/38632362/r...,[Benzinga Neuro],The weekend was filled with intriguing stories...,https://cdn.benzinga.com/files/images/story/20...,Benzinga,News,www.benzinga.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.007254,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.1595...",0.159569,-0.168038,1,Somewhat-Bearish,Bearish
20240505T172611,Tesla pullback puts onus on other firms to bui...,https://www.business-standard.com/world-news/t...,[NYT],Another charging company is likely to take ove...,https://bsmedia.business-standard.com/_media/b...,Business Standard,GoogleRSS,www.business-standard.com,"[{'topic': 'Manufacturing', 'relevance_score':...",0.004840,Neutral,"[{'ticker': 'TSLA', 'relevance_score': '0.6184...",0.618426,0.093125,1,Neutral,Neutral


### If you want more data you can run this the next day starting at earliest publish date and going backwards in time.
* Load dataset so far 
* Find earliest published time
* Can give you duplicates when you are done with data. I didn't handle it in this code so drop duplicates maybe by URL,published_time,summary...

In [32]:
df = pd.read_csv('tsla_sentiment.csv',index_col='time_published').sort_index()
earliest_time_published = df.index[0]
earliest_time_published

'20240424T132555'

#### Get more data

In [33]:
df_new = get_dataset(time_to=earliest_time_published[:-2])

### Combine the dataframes and save to a csv

In [34]:
df = pd.concat([df_new,df])
df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
df.to_csv('tsla_sentiment.csv')

### When we run out of dates or want to add new articles we don't have we can now go forward in time from latest published time. WE can run this like every day or every few days and we should have mostly everything
* Find latest time
* We take nearest minute forward not backwards as we did when going backwards in time
* We set the time_from to this...

In [None]:
df = pd.read_csv('tsla_sentiment.csv',index_col='time_published').sort_index()

latest_time_published = df.index[-1]

# Parse the timestamp string into a datetime object
dt = datetime.datetime.strptime(latest_time_published[:-2], '%Y%m%dT%H%M')

# Round the datetime to the nearest second
rounded_dt = dt + datetime.timedelta(minutes=1)

# Format the rounded datetime back to the string format
next_timestamp = rounded_dt.strftime('%Y%m%dT%H%M')
 

### Combine the dataframes and save to a csv

In [None]:
df_new = get_dataset(time_from=next_timestamp,time_to='')

### Combine the dataframes and save to a csv

In [None]:
df=pd.concat([df,df_new])
df.drop_duplicates(subset=['summary'],inplace=True,keep='first')
df.to_csv('tsla_sentiment.csv')

### Explanation

In [36]:
data=_get_data('TSLA',"20030410T0130","",api_key)

In [38]:
data.keys()

dict_keys(['items', 'sentiment_score_definition', 'relevance_score_definition', 'feed'])

In [39]:
data['items']

'692'

In [40]:
data['sentiment_score_definition']

'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish'

In [41]:
data['relevance_score_definition']

'0 < x <= 1, with a higher score indicating higher relevance.'

In [44]:
df=pd.DataFrame(data['feed'])

In [47]:
df['ticker_sentiment'].iloc[0]

[{'ticker': 'TSLA',
  'relevance_score': '0.055236',
  'ticker_sentiment_score': '-0.023999',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:BTC',
  'relevance_score': '0.164656',
  'ticker_sentiment_score': '-0.0489',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:DOGE',
  'relevance_score': '0.110209',
  'ticker_sentiment_score': '-0.026718',
  'ticker_sentiment_label': 'Neutral'},
 {'ticker': 'CRYPTO:SOL',
  'relevance_score': '0.055236',
  'ticker_sentiment_score': '0.215945',
  'ticker_sentiment_label': 'Somewhat-Bullish'}]