In [1]:
import urllib.parse
import requests
import pandas as pd
from pytrends.request import TrendReq
import datetime
from dateutil.relativedelta import relativedelta

In [2]:
def get_stock_data(ticker_code:str,
                   start_date:str,
                   end_date:str,
                   api_key:str):
    url = f'https://eodhistoricaldata.com/api/eod/{ticker_code}.US?'

    params = {'from':start_date,
              'to':end_date,
              'period':'d',
              'fmt':'json',
              'api_token':api_key}
    stock_url = url + urllib.parse.urlencode(params)
    request = requests.get(stock_url).json()
    df = pd.DataFrame(request)
    return df

In [3]:
def get_google_search(company_ticker):
    # build payload
    kw_list = [company_ticker]
    company_name = f'{company_ticker}'
    pytrends = TrendReq(hl='en-US', tz=360)
#    pytrends.build_payload(kw_list, cat=0, timeframe='2022-11- 2022-11-30')
#    second_half = pytrends.interest_over_time()
#    second_half = second_half.reset_index()
    pytrends.build_payload(kw_list, cat=0, timeframe=f'{start_date} {end_date}')
    data = pytrends.interest_over_time()
    data = data.reset_index()
#    data = pd.concat([first_half,second_half]).drop(['isPartial'], axis=1).reset_index(drop=True)
    data['date'] = pd.to_datetime(data['date'])
    data = data.drop(columns = 'isPartial')
    return data.rename(columns={company_name:'google_count'})

In [4]:
def filter_data(df:pd.DataFrame,
                strt_date:str,
                end_date:str):
    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['date'] >= strt_date) & (df['date'] <= end_date)]
    return df.reset_index(drop=True)

In [5]:
def get_news_sentiment(stock, start_date, end_date, api_key):
    url = 'https://eodhistoricaldata.com/api/sentiments?'
    params = {'s':stock,
              'from':start_date,
              'to':end_date,
              'api_token':api_key}
    company_key = f'{stock}.US'
    news_url = url + urllib.parse.urlencode(params)
    request = requests.get(news_url).json()[company_key]
    if not request == []:
        df = pd.DataFrame(request).rename(columns={'count':'news_count',
                                                   'normalized':'news_sentiment'})
        df['date'] = pd.to_datetime(df['date'])
    else:
        empty_news_df = dict({'date':start_date,
                 'news_count':0,
                 'news_sentiment':0})
        df = pd.DataFrame(empty_news_df)
    return df

In [6]:
def get_tweet_sentiment(stock, start_date, end_date, api_key):
    url = 'https://eodhistoricaldata.com/api/tweets-sentiments?'
    params = {'s':stock,
              'from':start_date,
              'to':end_date,
              'api_token':api_key}
    company_key = f'{stock}.US'
    tweet_url = url + urllib.parse.urlencode(params)
    request = requests.get(tweet_url).json()[company_key]
    if not request == []:
        df = pd.DataFrame(request).rename(columns={'count':'tweet_count',
                                                   'normalized':'tweet_sentiment'})
        df['date'] = pd.to_datetime(df['date'])
    else:
        empty_tweet_df = dict({'date':start_date,
                 'tweet_count':0,
                 'tweet_sentiment':0})
        df = pd.DataFrame(empty_tweet_df)
    return df

In [7]:
#company_names = dict({'Apple':'AAPL', 
#                 'Amazon':'AMZN',
#                 'Microsoft':'MSFT'})

api_key = '6373e059e5d049.13587213'
start_date = (datetime.datetime.now()-datetime.timedelta(days=30)).strftime('%Y-%m-%d')
end_date = datetime.datetime.now().strftime('%Y-%m-%d')
filler_columns = ['news_count','news_sentiment','tweet_count','tweet_sentiment']

In [14]:
company = 'MSFT'
df = get_stock_data(company, start_date, end_date, api_key)
ggl_df = get_google_search(company)

In [15]:
price_df = filter_data(df, start_date, end_date)
news_df = get_news_sentiment(company, start_date, end_date, api_key)
tweet_df = get_tweet_sentiment(company, start_date, end_date, api_key)

base_df = pd.merge(ggl_df, price_df, how='outer')
base_df.fillna(method='ffill', inplace=True)
base_df = base_df.merge(news_df, on='date', how='outer')
base_df = base_df.merge(tweet_df, on='date', how='outer')
base_df = base_df.fillna(0)
#base_df = base_df.iloc[:,[2,3,4,5,6,7,8,9,10,11,1]]
#base_df = base_df.iloc[:,[0,2,3,4,5,6,7,8,9,10,11,1]]
base_df = base_df.iloc[:,[0,8,9,10,11,1]]
base_df.set_index('date',inplace=True)
base_df = base_df.sort_values(by='date')

In [16]:
base_df

Unnamed: 0_level_0,news_count,news_sentiment,tweet_count,tweet_sentiment,google_count
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-11-07,16,0.2763,784,0.0904,84.0
2022-11-08,17,0.3831,634,0.1008,70.0
2022-11-09,14,0.5022,374,0.088,66.0
2022-11-10,23,0.6817,549,0.1675,100.0
2022-11-11,10,0.7513,628,0.1826,81.0
2022-11-12,2,0.999,412,0.1236,23.0
2022-11-13,2,0.498,486,0.0968,14.0
2022-11-14,15,0.1716,543,0.1345,69.0
2022-11-15,8,0.4694,708,0.1508,75.0
2022-11-16,26,0.6112,623,0.135,68.0
