# UBS Stock Data

In [3]:
import pandas as pd
import os
#%pip install pytrends
import pytrends
from pytrends.request import TrendReq
#pip install pageviewapi
import pageviewapi
#%pip install yfinance
import yfinance as yf

In [4]:
def Big_scraper(kw_list_1, kw_list_2, ticker, start,end):
    
    """
    Description:
    ------------
    
    The function initially grabs historical, indexed, hourly data for when the keyword 
    was searched most as shown on Google Trends' Interest Over Time section.
    It then cleans the data to show daily hits on the keyword in Google news.

    
    input:
    -----
    kw_list_1: List of up to 5 key words that will be scraped from google trends for the dates given.
             Here, the scraping will pull the total posted items in google news that contains
             one of the key words.
    
    kw_list_2: List of wikipedia article titles (unlimited length) that will pull the amount of
            views the article recieved each day. 

    ticker: the ticker abriviation of the desired stock. Must be netered in as an all capitalized string 
    example Apple Inc. woud be "AAPL"
             
    start: the start of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
    
    end: the end of the desired timeline you want scrape. Date Must be entered in as "YYYYMMDD"
             
    return:
    -------
    
    combined: a dataframe containing the sum of the daily keyword hits in google news (key words labeled _x),
    
    data frame cointaing stock info including open, close, high, low prices of the stock,
    as well as the stocks daily trading volume and the amount if there was a split or dividend 
    preformed on the stock that day,
    
    and the sum of how many times key word wikipedia pages were viewed in a day (key words labeled _y)
    """
    
    year_s = int(start[0:4])
    month_s = int(start[4:6])
    day_s = int(start[6:8])
    year_e = int(end[0:4])
    month_e = int(end[4:6])
    day_e = int(end[6:8])
    
    starter = pd.to_datetime(f"'{year_s}-{month_s}-{day_s}'")
    ender = pd.to_datetime(f"'{year_e}-{month_e}-{day_e}'")
    
    
    pytrends = TrendReq(hl='en-US', tz=360, retries=10)
    jeff = pytrends.get_historical_interest(kw_list_1, \
                                 year_start = year_s, month_start = month_s, day_start = day_s, hour_start = 1, \
                                 year_end = year_e, month_end = month_e, day_end = day_e, hour_end = 23, \
                                 cat = 0, geo = '', gprop = 'news', sleep = 60)
    
    jeff = jeff.iloc[:, 0:-1] # eliminates the isPartial Column
    jeff = jeff.reset_index().drop_duplicates(subset = "date") #removing duplicates from the index
    jeff = jeff.groupby(pd.Grouper(key="date", freq="D")).mean() # coverts to the mean of daily search score
    
    tick = yf.Ticker(ticker)
    hist = tick.history(start=starter, end=ender)
    hist = pd.DataFrame(hist)
    
    combined = jeff.merge(hist, left_index=True, right_index=True, how="left") 
    
    d = pd.DataFrame()
    for key_word in kw_list_2:
        geoff = pageviewapi.per_article('en.wikipedia', key_word, start, end,
                                    access='all-access', agent='all-agents', granularity='daily')
        dicty = dict(geoff)
        views = pd.DataFrame(dicty["items"])
        views["timestamp"] = pd.to_datetime((views["timestamp"]), format="%Y%m%d%H")
        views = views.set_index("timestamp")
        page = pd.Series(views["views"])
        d[key_word] = page
        
    combined = combined.merge(d, left_index=True, right_index=True, how="left") 
    
    return combined

In [5]:
kw_list_1 = ["UBS", "UBS Financial Services Inc.", "UBS Investment Bank", "UBS Global Wealth Management", "UBS Asset Management"]
kw_list_2 = ["UBS", "Union Bank of Switzerland", "UBS tax evasion controversies", "Banking in Switzerland"]

In [18]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20210101", "20211231")

HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/explore?hl=en-US&tz=360&req=%7B%22comparisonItem%22%3A+%5B%7B%22keyword%22%3A+%22UBS%22%2C+%22time%22%3A+%222021-10-15T01+2021-10-22T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Financial+Services+Inc.%22%2C+%22time%22%3A+%222021-10-15T01+2021-10-22T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Investment+Bank%22%2C+%22time%22%3A+%222021-10-15T01+2021-10-22T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Global+Wealth+Management%22%2C+%22time%22%3A+%222021-10-15T01+2021-10-22T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Asset+Management%22%2C+%22time%22%3A+%222021-10-15T01+2021-10-22T01%22%2C+%22geo%22%3A+%22%22%7D%5D%2C+%22category%22%3A+0%2C+%22property%22%3A+%22news%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fde7eebc0a0>: Failed to establish a new connection: [Errno 8] nodename n

In [23]:
UBS.head()

Unnamed: 0_level_0,UBS_x,UBS Financial Services Inc.,UBS Investment Bank,UBS Global Wealth Management,UBS Asset Management,Open,High,Low,Close,Volume,Dividends,Stock Splits,UBS_y,Union Bank of Switzerland,UBS tax evasion controversies,Banking in Switzerland
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2021-01-01,3.391304,0.0,0.0,0.0,0.0,,,,,,,,1044,69,22,365
2021-01-02,3.0,0.0,0.0,0.0,0.0,,,,,,,,1085,111,17,392
2021-01-03,3.958333,0.0,0.0,0.0,0.0,,,,,,,,1311,91,11,397
2021-01-04,6.666667,0.0,0.0,0.0,0.0,13.726173,13.741383,13.469521,13.583589,4974100.0,0.0,0.0,1521,107,30,385
2021-01-05,0.0,0.0,0.0,0.0,0.0,13.75469,13.963814,13.735678,13.887769,3584400.0,0.0,0.0,1460,98,28,356


In [26]:
UBS.to_csv("2021_UBS_Data.csv")

In [6]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20190101", "20191231")

In [7]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20200101", "20201231")

In [8]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20220101", "20220331")

In [9]:
UBS = Big_scraper(kw_list_1, kw_list_2,"UBS", "20190101", "20220331")

HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/explore?hl=en-US&tz=360&req=%7B%22comparisonItem%22%3A+%5B%7B%22keyword%22%3A+%22UBS%22%2C+%22time%22%3A+%222020-09-29T01+2020-10-06T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Financial+Services+Inc.%22%2C+%22time%22%3A+%222020-09-29T01+2020-10-06T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Investment+Bank%22%2C+%22time%22%3A+%222020-09-29T01+2020-10-06T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Global+Wealth+Management%22%2C+%22time%22%3A+%222020-09-29T01+2020-10-06T01%22%2C+%22geo%22%3A+%22%22%7D%2C+%7B%22keyword%22%3A+%22UBS+Asset+Management%22%2C+%22time%22%3A+%222020-09-29T01+2020-10-06T01%22%2C+%22geo%22%3A+%22%22%7D%5D%2C+%22category%22%3A+0%2C+%22property%22%3A+%22news%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fc6a9fd61c0>: Failed to establish a new connection: [Errno 8] nodename n

In [10]:
UBS.to_csv("2019_UBS_Data.csv")
UBS.to_csv("2020_UBS_Data.csv")
UBS.to_csv("2022_UBS_Data.csv")

In [None]:
UBS.to_csv("2022_UBS_Data.csv")