## Getting Stock Market Data
We download the historical stock market using the yfinance library that takes the data from yahoo finance. The stock market data were downloaded for the 12 biotechnology companies this study focuses on and matches the times that the tweets were downloaded previously.


In [2]:
#!pip install yfinance
import yfinance as yf
from datetime import datetime, timedelta
import pandas as pd
import pytz


In [3]:
def download_stock_data(ticker: str, start_date: str, start_time: str, end_date: str, end_time: str, interval: str = '1h') -> pd.DataFrame:
    """
    Download historical stock market data for a specific company

    Parameters:
        ticker (str): The ticker symbol of the company
        start_date (str): The start date of the data in the format 'yyyy-mm-dd'
        start_time (str): The start time of the data in the format 'hh:mm'
        end_date (str): The end date of the data in the format 'yyyy-mm-dd'
        end_time (str): The end time of the data in the format 'hh:mm'
        interval (str): The interval of the data, default is '1m'

    Returns:
        pd.DataFrame: Dataframe containing the historical stock market data
    """
    # Create the start and end datetime strings
    start_datetime_str = start_date + ' ' + start_time
    end_datetime_str = end_date + ' ' + end_time
     
    # Convert the start and end datetime strings to datetime objects with the desired time zone
    #Our data are recorded in the New York time, meaning we are 5 hours ahead 
    start_datetime = datetime.strptime(start_datetime_str, '%Y-%m-%d %H:%M')
    end_datetime = datetime.strptime(end_datetime_str, '%Y-%m-%d %H:%M')

    # Download the historical data
    data = yf.download(ticker, interval=interval, start=start_datetime, end=end_datetime)

    return data


In [4]:
# Example usage
data = download_stock_data("BAYN.DE", "2022-01-31", "09:55", "2022-02-14", "8:55", '1h')
data
#bayern data
#yf.download("BAYN ", interval="1h", start="2022-01-01", end="2022-01-07")


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-31 09:00:00+01:00,54.240002,54.279999,53.619999,53.959999,53.959999,0
2022-01-31 10:00:00+01:00,53.959999,54.000000,53.560001,53.630001,53.630001,459186
2022-01-31 11:00:00+01:00,53.610001,53.610001,53.240002,53.400002,53.400002,572286
2022-01-31 12:00:00+01:00,53.389999,53.590000,53.340000,53.590000,53.590000,358229
2022-01-31 13:00:00+01:00,53.580002,53.830002,53.500000,53.830002,53.830002,370137
...,...,...,...,...,...,...
2022-02-11 13:00:00+01:00,53.889999,54.020000,53.770000,53.830002,53.830002,157213
2022-02-11 14:00:00+01:00,53.830002,54.189999,53.820000,54.099998,54.099998,177538
2022-02-11 15:00:00+01:00,54.099998,54.500000,54.080002,54.250000,54.250000,228115
2022-02-11 16:00:00+01:00,54.270000,54.340000,54.139999,54.189999,54.189999,205418


In [6]:
#load data for the companies we are studying
##covid times
companies = ["Johnson&Johnson", "Eli Lilly", "Novo Nordisk", "AbbVie", "Merck", "Pfizer", "Roche","AstraZeneca", "Novartis", "Moderna" , "Bayer", "WuXi Biologics"]
ticker = ["JNJ","LLY","NVO","ABBV","MRK","PFE","RHHBY","AZN","NVS","MRNA", "BAYN.DE","WX"]
for tick in ticker:
    data = download_stock_data(tick,  "2023-02-01", "00:45", "2023-03-20", "19:44", '1h')
    start_ny_time = datetime.strptime(f"2023-02-01 00:45", '%Y-%m-%d %H:%M')
    start_swiss_time = start_ny_time + timedelta(hours=5)
    data["start_swiss_time"] = start_swiss_time.strftime('%Y-%m-%d %H:%M')
    end_ny_time = datetime.strptime(f"2023-03-20 19:44", '%Y-%m-%d %H:%M')
    end_swiss_time = end_ny_time + timedelta(hours=5)
    data["end_swiss_time"] = end_swiss_time.strftime('%Y-%m-%d %H:%M')
    print (data)
    data.to_csv(f'{tick}_stockMarket_round2_feb1-mar19.csv')
    
#NOTE: The stock market at NY opens from 9:30 to 4:00 
    

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2023-02-01 09:30:00-05:00  162.559998  164.059998  162.399994  162.830002   
2023-02-01 10:30:00-05:00  162.830002  164.270004  162.830002  164.100006   
2023-02-01 11:30:00-05:00  164.089996  164.648697  163.960007  164.029999   
2023-02-01 12:30:00-05:00  164.029999  164.244003  163.669998  163.710007   
2023-02-01 13:30:00-05:00  163.669998  164.429993  163.610001  164.035004   
...                               ...         ...         ...         ...   
2023-03-01 11:30:00-05:00  152.339996  152.690002  152.199997  152.220001   
2023-03-01 12:30:00-05:00  152.220001  152.404999  152.039993  152.070007   
2023-03-01 13:30:00-05:00  152.080002  152.229996  151.919998  152.003998   
2023-03-01 14:30:00-05:00  152.000000  152.399994  151.979996  152.279999   
2023-03

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2023-02-01 09:30:00-05:00  106.970001  107.139999  106.360001  106.635002   
2023-02-01 10:30:00-05:00  106.620003  106.964996  106.529999  106.745003   
2023-02-01 11:30:00-05:00  106.745003  107.050003  106.620003  106.805000   
2023-02-01 12:30:00-05:00  106.809998  107.019997  106.680000  106.904999   
2023-02-01 13:30:00-05:00  106.900002  107.010002  106.300003  106.330002   
...                               ...         ...         ...         ...   
2023-03-01 11:30:00-05:00  107.109901  107.315002  106.650002  106.790001   
2023-03-01 12:30:00-05:00  106.800003  106.860001  106.330002  106.389999   
2023-03-01 13:30:00-05:00  106.400002  106.680000  106.269997  106.470001   
2023-03-01 14:30:00-05:00  106.449997  106.870003  106.430000  106.809998   
2023-03

[*********************100%***********************]  1 of 1 completed
                                Open       High        Low      Close  \
Datetime                                                                
2023-02-01 09:30:00-05:00  88.199997  88.580002  87.324997  87.434998   
2023-02-01 10:30:00-05:00  87.434998  88.230003  87.279999  87.750000   
2023-02-01 11:30:00-05:00  87.739998  87.779999  87.309998  87.309998   
2023-02-01 12:30:00-05:00  87.300003  87.449997  87.190002  87.237000   
2023-02-01 13:30:00-05:00  87.230003  87.379997  87.029999  87.089996   
...                              ...        ...        ...        ...   
2023-03-01 11:30:00-05:00  83.760002  84.019997  83.760002  83.955002   
2023-03-01 12:30:00-05:00  83.959999  83.991402  83.894997  83.959999   
2023-03-01 13:30:00-05:00  83.959999  83.965202  83.860001  83.860001   
2023-03-01 14:30:00-05:00  83.839996  83.930000  83.805000  83.855003   
2023-03-01 15:30:00-05:00  83.855003  83.879997  83.779