In [50]:
from openai import OpenAI
import pandas as pd
from datasets import Dataset
import os
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

In [51]:
df = pd.read_parquet('data/2.cryptonews_processed.parquet')
# df = df[0:10].reset_index(drop=True)
print(df.shape)
df.head()

(155376, 4)


Unnamed: 0,title,text,source_name,date
0,2021 Bitcoin Price Predictions: Is The Massive...,As the bitcoin price hovers under the psycholo...,Forbes,2021-01-02 00:20:00+00:00
1,Will Central Banks Hold Bitcoin in 2021?,Central banks (CB) will hold bitcoin sooner or...,BeInCrypto,2021-01-01 20:31:35+00:00
2,"Bitcoin Is Digital Social Justice, feat. Tyron...",The podcaster and CEO of Onramp Invest discuss...,Coindesk,2021-01-01 19:15:02+00:00
3,Bitcoin hits all-time high against gold as hav...,"BTC has hit another milestone, this time again...",Cointelegraph,2021-01-01 18:52:00+00:00
4,"The Last Time This Indicator Flashed, Bitcoin ...",Bitcoin has been facing some turbulence as of ...,Bitcoinist,2021-01-01 18:00:00+00:00


In [4]:
# os.environ['deepseek_api_key'] = 'YOUR_KEY'
cryptonews_api_key = os.getenv('deepseek_api_key')
client = OpenAI(api_key=cryptonews_api_key, base_url="https://api.deepseek.com")

system_prompt = """
You are an expert cryptocurrency.
Your task is to analyze crypto-related news headlines and articles to perform Aspect-Based Sentiment Analysis (ABSA).
Focus on the following aspects:

1. **price_prediction**: Sentiment related to price predictions, whale movements, large transactions, trading, technical indicators.
2. **economy**: Sentiment related to macroeconomic conditions, macroeconomic indicators.
3. **regulation**: Sentiment related to regulatory news or government policies.
4. **technology**: Sentiment related to blockchain technology, protocol upgrades, innovations.
5. **adoption**: Sentiment related to crypto adoptions, ETFs, crypto projects, business developments, partnerships, popularity.
6. **cybersecurity**: Sentiment related to cybersecurity, hacks, crime, money laundering.

For each aspect, provide a sentiment score on a scale of -1 to +1, where:
- -1 = Strongly Negative
- 0 = Neutral or Not mentioned
- +1 = Strongly Positive
- round to 1 decimal place

Output the results in JSON format with the following structure:
{
  "price_prediction": [score],
  "economy": [score],
  "regulation": [score],
  "technology": [score],
  "adoption": [score],
  "cybersecurity": [score],
}
"""

def analyze_aspects(example):
  try:
    user_prompt = f"Title: {example['title']}\nText: {example['text']}"
    messages = [
      {"role": "system", "content": system_prompt},
      {"role": "user", "content": user_prompt}
    ]
    response = client.chat.completions.create(
      model="deepseek-chat",
      messages=messages,
      response_format={'type': 'json_object'}
    )
    return {"absa": json.loads(response.choices[0].message.content)}
  except Exception as e:
    print(f"Error processing example: {example['title']}\n{e}")
    return {"absa": None}

def analyze_aspects_parallel(df, max_workers=64):
  with open("data/3.deepseek_absa_output.json", "a") as file, ThreadPoolExecutor(max_workers=max_workers) as executor:
    future_to_index = {
      executor.submit(analyze_aspects, row): idx
      for idx, row in df.iterrows()
    }
    for future in as_completed(future_to_index):
      idx = future_to_index[future]
      try:
        result = future.result()
        file.write(json.dumps({"index": idx, "absa": result["absa"]}) + "\n")
      except Exception as e:
        print(f"Error processing result: {e}")
        file.write(json.dumps({"index": idx, "absa": None}) + "\n")

analyze_aspects_parallel(df)

-----------------------------------

In [52]:
df_output = pd.read_json("data/3.deepseek_output.json", lines=True)
df_output.set_index('index', inplace=True)
df_output.head()

Unnamed: 0_level_0,absa
index,Unnamed: 1_level_1
49,"{'price_prediction': 1.0, 'economy': 0.0, 'reg..."
18,"{'price_prediction': 1.0, 'economy': 0.0, 'reg..."
26,"{'price_prediction': 0.8, 'economy': 0, 'regul..."
19,"{'price_prediction': 0.8, 'economy': 0.3000000..."
55,"{'price_prediction': 0, 'economy': 0, 'regulat..."


In [53]:
df = pd.merge(df, df_output, left_index=True, right_on="index")
df.head()

Unnamed: 0_level_0,title,text,source_name,date,absa
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2021 Bitcoin Price Predictions: Is The Massive...,As the bitcoin price hovers under the psycholo...,Forbes,2021-01-02 00:20:00+00:00,"{'price_prediction': 0.5, 'economy': 0, 'regul..."
1,Will Central Banks Hold Bitcoin in 2021?,Central banks (CB) will hold bitcoin sooner or...,BeInCrypto,2021-01-01 20:31:35+00:00,"{'price_prediction': 0, 'economy': 0.5, 'regul..."
2,"Bitcoin Is Digital Social Justice, feat. Tyron...",The podcaster and CEO of Onramp Invest discuss...,Coindesk,2021-01-01 19:15:02+00:00,"{'price_prediction': 0, 'economy': 0.300000000..."
3,Bitcoin hits all-time high against gold as hav...,"BTC has hit another milestone, this time again...",Cointelegraph,2021-01-01 18:52:00+00:00,"{'price_prediction': 1.0, 'economy': 0, 'regul..."
4,"The Last Time This Indicator Flashed, Bitcoin ...",Bitcoin has been facing some turbulence as of ...,Bitcoinist,2021-01-01 18:00:00+00:00,"{'price_prediction': 0.30000000000000004, 'eco..."


In [54]:
absa_columns = pd.json_normalize(df['absa'])
absa_columns = absa_columns.round(1)
absa_columns.head()

Unnamed: 0,price_prediction,economy,regulation,technology,adoption,cybersecurity
0,0.5,0.0,0,0.0,0.0,0
1,0.0,0.5,0,0.0,0.7,0
2,0.0,0.3,0,0.5,0.7,0
3,1.0,0.0,0,0.0,0.0,0
4,0.3,0.0,0,0.0,0.0,0


In [55]:
df[absa_columns.columns] = absa_columns
df.head()

Unnamed: 0_level_0,title,text,source_name,date,absa,price_prediction,economy,regulation,technology,adoption,cybersecurity
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2021 Bitcoin Price Predictions: Is The Massive...,As the bitcoin price hovers under the psycholo...,Forbes,2021-01-02 00:20:00+00:00,"{'price_prediction': 0.5, 'economy': 0, 'regul...",0.5,0.0,0,0.0,0.0,0
1,Will Central Banks Hold Bitcoin in 2021?,Central banks (CB) will hold bitcoin sooner or...,BeInCrypto,2021-01-01 20:31:35+00:00,"{'price_prediction': 0, 'economy': 0.5, 'regul...",0.0,0.5,0,0.0,0.7,0
2,"Bitcoin Is Digital Social Justice, feat. Tyron...",The podcaster and CEO of Onramp Invest discuss...,Coindesk,2021-01-01 19:15:02+00:00,"{'price_prediction': 0, 'economy': 0.300000000...",0.0,0.3,0,0.5,0.7,0
3,Bitcoin hits all-time high against gold as hav...,"BTC has hit another milestone, this time again...",Cointelegraph,2021-01-01 18:52:00+00:00,"{'price_prediction': 1.0, 'economy': 0, 'regul...",1.0,0.0,0,0.0,0.0,0
4,"The Last Time This Indicator Flashed, Bitcoin ...",Bitcoin has been facing some turbulence as of ...,Bitcoinist,2021-01-01 18:00:00+00:00,"{'price_prediction': 0.30000000000000004, 'eco...",0.3,0.0,0,0.0,0.0,0


In [56]:
df.drop(columns=['absa'], inplace=True)

In [57]:
df.rename(columns={'price_prediction': 'tanalysis_absa',
                   'economy': 'economy_absa',
                   'regulation': 'regulation_absa',
                   'technology': 'technology_absa',
                   'adoption': 'adoption_absa',
                   'cybersecurity': 'cybersecurity_absa'}, inplace=True)

In [58]:
df.head()

Unnamed: 0_level_0,title,text,source_name,date,tanalysis_absa,economy_absa,regulation_absa,technology_absa,adoption_absa,cybersecurity_absa
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,2021 Bitcoin Price Predictions: Is The Massive...,As the bitcoin price hovers under the psycholo...,Forbes,2021-01-02 00:20:00+00:00,0.5,0.0,0,0.0,0.0,0
1,Will Central Banks Hold Bitcoin in 2021?,Central banks (CB) will hold bitcoin sooner or...,BeInCrypto,2021-01-01 20:31:35+00:00,0.0,0.5,0,0.0,0.7,0
2,"Bitcoin Is Digital Social Justice, feat. Tyron...",The podcaster and CEO of Onramp Invest discuss...,Coindesk,2021-01-01 19:15:02+00:00,0.0,0.3,0,0.5,0.7,0
3,Bitcoin hits all-time high against gold as hav...,"BTC has hit another milestone, this time again...",Cointelegraph,2021-01-01 18:52:00+00:00,1.0,0.0,0,0.0,0.0,0
4,"The Last Time This Indicator Flashed, Bitcoin ...",Bitcoin has been facing some turbulence as of ...,Bitcoinist,2021-01-01 18:00:00+00:00,0.3,0.0,0,0.0,0.0,0


In [60]:
absa_cols = ['tanalysis_absa', 'economy_absa', 'regulation_absa', 'technology_absa', 'adoption_absa', 'cybersecurity_absa']
for col in absa_cols:
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
df.head()

Unnamed: 0_level_0,title,text,source_name,date,tanalysis_absa,economy_absa,regulation_absa,technology_absa,adoption_absa,cybersecurity_absa
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,2021 Bitcoin Price Predictions: Is The Massive...,As the bitcoin price hovers under the psycholo...,Forbes,2021-01-02 00:20:00+00:00,0.5,0.0,0.0,0.0,0.0,0.0
1,Will Central Banks Hold Bitcoin in 2021?,Central banks (CB) will hold bitcoin sooner or...,BeInCrypto,2021-01-01 20:31:35+00:00,0.0,0.5,0.0,0.0,0.7,0.0
2,"Bitcoin Is Digital Social Justice, feat. Tyron...",The podcaster and CEO of Onramp Invest discuss...,Coindesk,2021-01-01 19:15:02+00:00,0.0,0.3,0.0,0.5,0.7,0.0
3,Bitcoin hits all-time high against gold as hav...,"BTC has hit another milestone, this time again...",Cointelegraph,2021-01-01 18:52:00+00:00,1.0,0.0,0.0,0.0,0.0,0.0
4,"The Last Time This Indicator Flashed, Bitcoin ...",Bitcoin has been facing some turbulence as of ...,Bitcoinist,2021-01-01 18:00:00+00:00,0.3,0.0,0.0,0.0,0.0,0.0


In [62]:
df.to_parquet('data/3b.cryptonews_absa.parquet')