# Setup

In [2]:
import subprocess
# Check if packages are installed
def check_packages():
    try:
        with open('requirements.txt', 'r') as file:
            packages = file.read().splitlines()
            print(f"Packages to check: {packages}")  # Debug print
            for package in packages:
                try:
                    __import__(package)
                    print(f"{package} is already installed.")
                except ImportError:
                    print(f"{package} is not installed. Installing...")
                    subprocess.check_call(['pip', 'install', package])
                    print(f"{package} has been installed.")
    except Exception as e:
        print(f"An error occurred: {e}")  # Print any other errors

# Call the function to check and install packages
check_packages()

Packages to check: ['requests', 'aiohttp', 'pandas', 'tweepy', 'praw', 'ccxt', 'transformers', 'vaderSentiment', 'textblob', 'flask', 'plotly', 'beautifulsoup4', 'datetime', 'torch ', 'torchvision ', 'torchaudio', 'transformers']
requests is already installed.
aiohttp is already installed.
pandas is already installed.
tweepy is already installed.
praw is already installed.
ccxt is already installed.
transformers is already installed.
vaderSentiment is already installed.
textblob is already installed.
flask is already installed.
plotly is already installed.
beautifulsoup4 is not installed. Installing...
beautifulsoup4 has been installed.
datetime is already installed.
torch  is not installed. Installing...
torch  has been installed.
torchvision  is not installed. Installing...
torchvision  has been installed.
torchaudio is already installed.
transformers is already installed.


In [3]:
from datetime import datetime, timedelta
import time
import requests
from dotenv import load_dotenv
import os
import pandas as pd
load_dotenv()

True

# Get News Articles

## News API

In [6]:
def standardize_date_format(df, date_column='date'):
    """Converts and standardizes the date format in a DataFrame column to 'YYYY-MM-DD HH:MM:SS'."""
    df[date_column] = pd.to_datetime(df[date_column]).dt.strftime('%Y-%m-%d %H:%M:%S')
    return df

In [7]:
from news_api import NewsAPI

LAST_WEEK = (datetime.today() - timedelta(days=7)).strftime('%Y-%m-%d')
TWO_DAYS_AGO = (datetime.today() - timedelta(days=2)).strftime('%Y-%m-%d')
YESTERDAY = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
TODAY = datetime.today().strftime('%Y-%m-%d')

# Create an instance of the NewsAPI
news_api = NewsAPI(api_key=os.getenv('news_api'))

# Fetch news articles about Bitcoin from a specific date range
bitcoin_news_df = news_api.get_news(topic='bitcoin', from_date=LAST_WEEK, to_date=TODAY)
bitcoin_news_df = standardize_date_format(bitcoin_news_df, 'date')

# Check if the DataFrame is not empty
if not bitcoin_news_df.empty:
    # Print the headlines along with the date and description
    print(f"Here are the articles about Bitcoin from the last week until today:\n")
    for index, row in bitcoin_news_df.iterrows():
        print(f"Date: {row['date']}")
        print(f"Headline: {row['headline']}")
        print(f"Description: {row['description']}\n")
else:
    print("No articles found for the given topic and date range.")

# Append the DataFrame to an existing CSV, without including the header again
bitcoin_news_df.to_csv('data/crypto_news.csv', mode='a', header=False, index=False)

Found 3 articles for topic: bitcoin
Here are the articles about Bitcoin from the last week until today:

Date: 2024-04-02 15:59:57
Headline: Bitcoin drops 8% amid ETF outflows and Fed cut apprehension
Description: Bitcoin has dropped 8% since Sunday night, as demand for spot exchange-traded funds slows, and hopes dwindle that the Federal Reserve will cut interest rates soon.

Date: 2024-04-03 17:07:49
Headline: ‘We’re not believers’: Goldman Sachs doubles down on crypto skepticism despite Wall Street embracing Bitcoin ETFs
Description: “We do not think [crypto] is an investment asset class," Sharmin Mossavar-Rahmani, chief investment officer of Goldman Sachs Wealth Management, said this week.

Date: 2024-04-01 10:00:00
Headline: Future of Finance: Coinbase’s Alesia Haas on why Ethereum isn’t a security and the crypto industry rebuilding trust after the FTX ‘travesty’
Description: Haas also discussed the crypto bull market, Sam Bankman-Fried’s sentencing, and what drew her to the world 

## seekalpha api

In [9]:
from seekingalpha import CryptoNewsSentiment

# Example usage
# Please replace 'your_api_key_here' with your actual RapidAPI key for Seeking Alpha.
# seekalpha_api = "your_api_key_here"
seekalpha_api = os.getenv('seeking_alpha')
sentiment_analysis = CryptoNewsSentiment(seekalpha_api)

### Date
# Get today's date with time set to 00:00:00 for the start of the day
start_of_today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)

# Get the end of today by adding 1 day to the start of today and subtracting 1 second
end_of_today = start_of_today + timedelta(days=1) - timedelta(seconds=1)

# Convert to Unix timestamp
start_today_unix = int(time.mktime(start_of_today.timetuple()))
end_today_unix = int(time.mktime(end_of_today.timetuple()))

# use these timestamps as arguments for `since` and `until`
crypto_news_df = sentiment_analysis.get_crypto_news(since=start_today_unix, until=end_today_unix)
crypto_news_df = standardize_date_format(crypto_news_df, 'date')
# Check if the DataFrame is not empty
if not crypto_news_df.empty:
    # Print the headlines along with the date and description
    print(f"Here are the articles found:\n")
    for index, row in crypto_news_df.iterrows():
        print(f"Date: {row['date']}")
        print(f"Headline: {row['headline']}")
        print(f"Description: {row['description']}\n")
else:
    print("No articles found for the given topic and date range.")
    
# Append the DataFrame to an existing CSV, without including the header again
try:
    existing_data = pd.read_csv('data/crypto_news.csv')
except FileNotFoundError:
    existing_data = pd.DataFrame(columns=['date', 'headline', 'description'])

# Combine the new data with the existing data
combined_data = pd.concat([existing_data, bitcoin_news_df]).drop_duplicates()

# Write the combined data back to the CSV
combined_data.to_csv('data/crypto_news.csv', index=False)

print("The CSV file has been updated.")

No articles found for the given topic and date range.
The CSV file has been updated.


# Get stock prices

## Alphavantage

In [None]:
from alpha_vantage import AlphaVantage
ALPHA_API_KEY = os.getenv('stock_api')
STOCK_ENDPOINT = 'https://www.alphavantage.co/query'
av = AlphaVantage(api_key=ALPHA_API_KEY, endpoint=STOCK_ENDPOINT)


## MarketStack API

In [4]:
from marketstack import StockPriceFetcher

#----- Stock Price Fetcher ------
fetcher = StockPriceFetcher('msft')  # Fetch end of day data for Microsoft
fetcher.fetch_price()

{'pagination': {'limit': 100, 'offset': 0, 'count': 100, 'total': 252}, 'data': {'name': 'Microsoft Corporation', 'symbol': 'MSFT', 'country': None, 'has_intraday': False, 'has_eod': True, 'eod': [{'open': 421.05, 'high': 421.87, 'low': 419.14, 'close': 420.72, 'volume': 21711778.0, 'adj_high': 421.87, 'adj_low': 419.12, 'adj_close': 420.72, 'adj_open': 420.96, 'adj_volume': 21871161.0, 'split_factor': 1.0, 'dividend': 0.0, 'symbol': 'MSFT', 'exchange': 'XNAS', 'date': '2024-03-28T00:00:00+0000'}, {'open': 424.2, 'high': 424.4, 'low': 419.01, 'close': 421.43, 'volume': 16679715.0, 'adj_high': 424.45, 'adj_low': 419.01, 'adj_close': 421.43, 'adj_open': 424.44, 'adj_volume': 16704978.0, 'split_factor': 1.0, 'dividend': 0.0, 'symbol': 'MSFT', 'exchange': 'XNAS', 'date': '2024-03-27T00:00:00+0000'}, {'open': 425.61, 'high': 425.99, 'low': 421.35, 'close': 421.65, 'volume': 16690800.0, 'adj_high': 425.99, 'adj_low': 421.35, 'adj_close': 421.65, 'adj_open': 425.61, 'adj_volume': 16725647.0, 

# Get Crypto Tickers
## Crypto API

In [1]:
from crypto_api import CryptoAPI

api = CryptoAPI()
api.get_crypto_listings()
api.save_crypto_listings_to_csv()

Number of cryptocurrencies: 3119
CSV file saved successfully


# Sentiment

In [5]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch

# Assuming mock_data.csv is your data file
data_path = 'data/crypto_news.csv'

# Load data
df = pd.read_csv(data_path)

# Initialize tokenizer and model
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

# Define a function to predict sentiment
def predict_sentiment(text):
    # Disable gradient computation for inference
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        # Detach the tensor and convert it to a numpy array
        predictions = predictions.detach().numpy()
    return predictions

# Adding a column for sentiment
df['sentiment'] = df.apply(lambda row: predict_sentiment(row['description']), axis=1)

# Display the dataframe
df

#TODO append sentiment column to .csv

                    date                                           headline  \
0    2024-04-05 15:06:24  Bitcoin off to a slow start in Q2, falls 2.6% ...   
1    2024-04-05 10:04:54  We're seeing new money come into Bitcoin – TD ...   
2    2024-04-05 09:25:39        Core Scientific mines 903 bitcoins in March   
3    2024-04-05 04:45:36  Argo Blockchain mines 103 bitcoins in March, a...   
4    2024-04-05 02:57:21            HIVE Digital mines 224 bitcoin in March   
5   2024-04-02T15:59:57Z  Bitcoin drops 8% amid ETF outflows and Fed cut...   
6   2024-04-03T17:07:49Z  ‘We’re not believers’: Goldman Sachs doubles d...   
7   2024-04-01T10:00:00Z  Future of Finance: Coinbase’s Alesia Haas on w...   
8    2024-04-02 15:59:57  Bitcoin drops 8% amid ETF outflows and Fed cut...   
9    2024-04-03 17:07:49  ‘We’re not believers’: Goldman Sachs doubles d...   
10   2024-04-01 10:00:00  Future of Finance: Coinbase’s Alesia Haas on w...   

                                   sentiment  
0   