In [47]:
!pip install yfinance



In [48]:
import yfinance as yf
import pandas as pd

# Stock Data

**Stocks:** $ONCO, $CNEY, $TNXP, $APLD, $KTTA

**Export data to `.csv`**

In [49]:
tickers = ["ONCO", "CNEY", "TNXP", "APLD", "KTTA"]
start_date = "2023-10-01"  
end_date = "2024-09-26"

In [50]:
data = {}

for ticker in tickers:
    stock_data = yf.Ticker(ticker).history(start=start_date, end=end_date)
    
    stock_data = stock_data[['Close', 'Volume']]
    stock_data.columns = [f'{ticker}_Close', f'{ticker}_Volume']  # Rename columns
    data[ticker] = stock_data

combined_data = pd.concat(data.values(), axis=1, join='inner')

In [51]:
combined_data.to_csv("../data/combined_stock_data.csv")

print(combined_data.head())

                           ONCO_Close  ONCO_Volume  CNEY_Close  CNEY_Volume  \
Date                                                                          
2023-10-02 00:00:00-04:00   20.719999         1098        3.57         8450   
2023-10-03 00:00:00-04:00   19.719999         1805        3.39        26667   
2023-10-04 00:00:00-04:00   17.240000          938        3.45        68543   
2023-10-05 00:00:00-04:00   17.280001          755        3.63        44113   
2023-10-06 00:00:00-04:00   17.200001         1453        3.60        19830   

                           TNXP_Close  TNXP_Volume  APLD_Close  APLD_Volume  \
Date                                                                          
2023-10-02 00:00:00-04:00   17.280001        11594       6.130      3041100   
2023-10-03 00:00:00-04:00   16.959999         9038       5.380      4787800   
2023-10-04 00:00:00-04:00   18.240000        18309       5.475      2217300   
2023-10-05 00:00:00-04:00   18.879999        26206 

### ONCO

**Sector:** Biotechnology

**Industry:** Healthcare

**Website:** https://onconetix.gcs-web.com

In [52]:
onco = yf.Ticker("ONCO")

# get all stock info
onco.info

{'address1': '201 East Fifth Street',
 'address2': 'Suite 1900',
 'city': 'Cincinnati',
 'state': 'OH',
 'zip': '45202',
 'country': 'United States',
 'phone': '513 620 4101',
 'website': 'https://onconetix.gcs-web.com',
 'industry': 'Biotechnology',
 'industryKey': 'biotechnology',
 'industryDisp': 'Biotechnology',
 'sector': 'Healthcare',
 'sectorKey': 'healthcare',
 'sectorDisp': 'Healthcare',
 'longBusinessSummary': "Onconetix, Inc., a biotechnology company, focuses on the research, development, and commercialization of solutions for men's health and oncology. It offers Entadfi, an FDA-approved, once daily pill that combines finasteride and tadalafil for the treatment of benign prostatic hyperplasia; and Proclarix, an in vitro protein-based blood diagnostic test for prostate cancer. The company was formerly known as Blue Water Biotech, Inc. and changed its name to Onconetix, Inc. in December 2023. Onconetix, Inc. was incorporated in 2018 and is headquartered in Cincinnati, Ohio.",


In [53]:
# ensure data is up-to-date
onco_stock_data = yf.download("ONCO", period="1d")

print(onco_stock_data.head())

[*********************100%***********************]  1 of 1 completed

            Open  High     Low  Close  Adj Close  Volume
Date                                                    
2024-10-04  4.42   4.8  4.3589    4.7        4.7   63903





### CNEY

**Sector:** Basic Materials

**Industry:** Specialty Chemicals

**Website:** https://www.cneny.com

In [54]:
cney = yf.Ticker("CNEY")

# get all stock info
cney.info

{'address1': 'Building 2-B',
 'address2': 'Room 206 No. 268 Shiniu Road Liandu District',
 'city': 'Lishui',
 'country': 'China',
 'phone': '86 571 8755 5823',
 'website': 'https://www.cneny.com',
 'industry': 'Specialty Chemicals',
 'industryKey': 'specialty-chemicals',
 'industryDisp': 'Specialty Chemicals',
 'sector': 'Basic Materials',
 'sectorKey': 'basic-materials',
 'sectorDisp': 'Basic Materials',
 'longBusinessSummary': "CN Energy Group. Inc., through its subsidiaries, engages in the manufacture and supply of wood-based activated carbon primarily in China. The company's activated carbon is used in pharmaceutical manufacturing, industrial manufacturing, water purification, environmental protection, and food and beverage production. It also engages in the generation and supply of biomass electricity; production of steam for heating; sale of minerals, stone, metal materials, construction materials, wood, chemical materials and products, rubber products, and paper products; manage

In [55]:
# ensure data is up-to-date
cney_stock_data = yf.download("CNEY", period="1d")

print(cney_stock_data.head())

[*********************100%***********************]  1 of 1 completed

              Open    High   Low   Close  Adj Close  Volume
Date                                                       
2024-10-04  0.7527  0.7724  0.72  0.7352     0.7352  590753





### TNXP

**Sector:** Biotechnology

**Industry:** Healthcare

**Website:** https://www.tonixpharma.com

In [56]:
tnxp = yf.Ticker("TNXP")
# get all stock info
tnxp.info

{'address1': '26 Main Street',
 'address2': 'Suite 101',
 'city': 'Chatham',
 'state': 'NJ',
 'zip': '07928',
 'country': 'United States',
 'phone': '862 799 8599',
 'fax': '862 904 8163',
 'website': 'https://www.tonixpharma.com',
 'industry': 'Biotechnology',
 'industryKey': 'biotechnology',
 'industryDisp': 'Biotechnology',
 'sector': 'Healthcare',
 'sectorKey': 'healthcare',
 'sectorDisp': 'Healthcare',
 'longBusinessSummary': "Tonix Pharmaceuticals Holding Corp., a biopharmaceutical company, focuses on developing, discovering, commercializing, and licensing therapeutics to treat and prevent human disease and alleviate suffering. It markets Zembrace SymTouch and Tosymra for the treatment of acute migraine with or without aura in adults. Its portfolio focuses on central nervous system disorders, as well as rare disease, immunology, and infectious disease product candidates. The company's priority is to submit a New Drug Application (NDA) to the FDA for TNX-102 SL (cyclobenzaprine HC

In [57]:
# ensure data is up-to-date
tnxp_stock_data = yf.download("TNXP", period="1d")

print(tnxp_stock_data.head())

[*********************100%***********************]  1 of 1 completed

              Open    High    Low  Close  Adj Close    Volume
Date                                                         
2024-10-04  0.1264  0.1458  0.123  0.141      0.141  19882964





### APLD

**Sector:** Technology

**Industry:** Information Technology Services

**Website:** https://applieddigital.com

In [58]:
apld = yf.Ticker("APLD")
# get all stock info
apld.info

{'address1': '3811 Turtle Creek Boulevard',
 'address2': 'Suite 2100',
 'city': 'Dallas',
 'state': 'TX',
 'zip': '75219',
 'country': 'United States',
 'phone': '214 427 1704',
 'website': 'https://applieddigital.com',
 'industry': 'Information Technology Services',
 'industryKey': 'information-technology-services',
 'industryDisp': 'Information Technology Services',
 'sector': 'Technology',
 'sectorKey': 'technology',
 'sectorDisp': 'Technology',
 'longBusinessSummary': 'Applied Digital Corporation designs, develops, and operates digital infrastructure solutions and cloud services high-performance computing (HPC) and artificial intelligence industries in North America. It operates through three segments: Data Center Hosting Business, Cloud Services Business, and HPC Hosting Business. The company offers infrastructure services to crypto mining customers; and GPU computing solutions for critical workloads related to AI, machine learning, and other HPC tasks. It also engages in the desi

In [59]:
# ensure data is up-to-date
apld_stock_data = yf.download("APLD", period="1d")

print(apld_stock_data.head())

[*********************100%***********************]  1 of 1 completed

            Open  High   Low   Close  Adj Close    Volume
Date                                                     
2024-10-04  8.05  8.26  7.35  7.4695     7.4695  10943795





### KTTA

**Sector:** Biotechnology

**Industry:** Healthcare

**Website:** https://www.pasithea.com

In [60]:
ktta = yf.Ticker("KTTA")
# get all stock info
ktta.info

{'address1': '1111 Lincoln Road',
 'address2': 'Suite 500',
 'city': 'Miami Beach',
 'state': 'FL',
 'zip': '33139',
 'country': 'United States',
 'phone': '702 514 4174',
 'website': 'https://www.pasithea.com',
 'industry': 'Biotechnology',
 'industryKey': 'biotechnology',
 'industryDisp': 'Biotechnology',
 'sector': 'Healthcare',
 'sectorKey': 'healthcare',
 'sectorDisp': 'Healthcare',
 'longBusinessSummary': 'Pasithea Therapeutics Corp., a biotechnology company, engages in discovery, research, and development of treatments for central nervous system disorders, RASopathies, and other diseases. Its lead product candidate PAS-004, a next-generation macrocyclic mitogen-activated protein kinase, or MEK inhibitor for use in the treatment of a range of RASopathies, including neurofibromatosis type 1 oncology indications. The company intends to develop PAS-003, to treat amyotrophic lateral sclerosis; and PAS-001, to treat schizophrenia. Pasithea Therapeutics Corp. was incorporated in 2020 a

In [61]:
# ensure data is up-to-date
ktta_stock_data = yf.download("KTTA", period="1d")
print(ktta_stock_data.head())


[*********************100%***********************]  1 of 1 completed

             Open  High   Low   Close  Adj Close  Volume
Date                                                    
2024-10-04  5.125  5.45  5.11  5.3644     5.3644   17578





# Sentiment Data

#### Alpha Vantage Market News & Sentiment 

In [16]:
import requests
import csv
from textblob import TextBlob
from datetime import datetime

# API key and function for Alpha Vantage
api_key = 'H1EMSYRMNJUX1ASO'
function = 'NEWS_SENTIMENT'

# List of stock ticker symbols
tickers = ['ONCO', 'CNEY', 'TNXP', 'APLD', 'KTTA']

start_date = "2023-10-01" 
end_date = "2024-09-26"    

time_from = datetime.strptime(start_date, "%Y-%m-%d").strftime("%Y%m%dT0000")  # Starting from midnight
time_to = datetime.strptime(end_date, "%Y-%m-%d").strftime("%Y%m%dT2359")  # Ending at 23:59

def get_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

for ticker in tickers:
    url = f'https://www.alphavantage.co/query?function={function}&tickers={ticker}&apikey={api_key}&time_from={time_from}&time_to={time_to}'

    response = requests.get(url)
    data = response.json()

    articles = data.get('feed', [])
    
    processed_articles = []
    
    for article in articles:
        title = article.get('title', 'N/A')
        summary = article.get('summary', '')  #
        time_published= article.get('time_published', 'N/A') 
        
        sentiment = get_sentiment(summary)
        
        processed_articles.append([title, summary, sentiment, time_published])
    
    filename = f'../data/unprocessed data/{ticker}_news_sentiment.csv'
    
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        
        writer.writerow(['Title', 'Summary', 'Sentiment', 'Date Published'])
        
        writer.writerows(processed_articles)
    
    print(f'Sentiment data for {ticker} within date range saved to {filename}')


Sentiment data for ONCO within date range saved to ../data/ONCO_news_sentiment.csv
Sentiment data for CNEY within date range saved to ../data/CNEY_news_sentiment.csv
Sentiment data for TNXP within date range saved to ../data/TNXP_news_sentiment.csv
Sentiment data for APLD within date range saved to ../data/APLD_news_sentiment.csv
Sentiment data for KTTA within date range saved to ../data/KTTA_news_sentiment.csv
