In [2]:
## importing necessary libraries
from bs4 import BeautifulSoup
import pandas as pd
import requests
from random import randrange ##  allows us to randomize numbers library
import time ## time tracker

In [3]:
## This link brings us to Coinbase's support page. 
## Before we scrape all 37 pages, we first need to indentify what we are targeting.
url = "https://status.coinbase.com/history?page=1"

In [4]:
## Pulling down the webpage.
response = requests.get(url)
print(response.status_code)

200


In [5]:
## Making some soup.
soup = BeautifulSoup(response.text, "html.parser")

In [19]:
## Searching for the name of previous incidents, we see they are in a div with data-react-class="HistoryIndex"
## Isolating Incident History div
incident_div = soup.find("div",{'data-react-class': 'HistoryIndex'})

In [9]:
## selecting "data-react-props"
## tightening range to include just incidents
## replacing curly brackets that break the data up by month, which are not needed
## splitting the string into separate values
s = incident_div["data-react-props"]
start = s.find('"months":[') + 10
end = s.find('}]}]', start)
s = s[start:end].replace("{","").replace("}","")
incident_list = list(s.split(',"'))

In [10]:
## all this code does is clean the data up and remove values we don't want as data
## some of the date and time values are stored as strangely formatted variables, so we're removing the formatting

clean_incidents = []
for item in incident_list:
     clean_incidents.append(item.replace('"',"")\
                            .replace("\\u003cvar data-var=\'date\'\\u003e","")\
                            .replace("\\u003c/var\\u003e","")\
                            .replace("\\u003cvar data-var=\'time\'\\u003e","")\
                            .replace("\\u003cvar data-var=\'time\'\\u003e","")\
                            .replace("incidents:[","")\
                            .replace("PDT]","PDT")\
                            .replace("PST]","PST"))
## grabbing year
year = "year:"
incident_year = [int(x.replace("year:","")) for x in clean_incidents if year in x]
print(incident_year)
## we will need to add all months in final scrape
## this excludes values that define the month of the incident, year, and the number of days in that month. 
## these variables could be thought of containers for the incidents of a certain month
december = ":December"
january = ":January"
november = ":November"
october = ":October"
september = ":September"
august = ":August"
year = "year:"
starts = "starts_on"
days = "days:"
cleaner_incidents = [ x for x in clean_incidents\
                     if october not in x\
                     and september not in x\
                     and august not in x\
                     and december not in x\
                     and january not in x\
                     and november not in x\
                     and days not in x\
                     and starts not in x\
                     and year not in x]

[2022, 2022, 2022]


In [11]:
## creating name list
name = "name:"
incident_names = [x.replace("name:","") for x in cleaner_incidents if name in x]

In [12]:
## creating message list
message = "message:"
incident_messages = [x.replace("message:","") for x in cleaner_incidents if message in x]

In [13]:
## creating code list
code = "code:"
incident_codes = [x.replace("code:","") for x in cleaner_incidents if code in x]

In [14]:
## creating impact list
impact = "impact:"
incident_impacts = [x.replace("impact:","") for x in cleaner_incidents if impact in x]

In [15]:
## creating timestamp list
timestamp = "timestamp:"
incident_timestamps = [x.replace("timestamp:","") for x in cleaner_incidents if timestamp in x]

In [16]:
## creating year column
## this code is written in a way that gives entries from january the correct year
## there are three months per page. incident_year contains the year for those three months in a list.
## january is always the first month to show up on the page when it's displayed with December and November.
## so, if a timestamp has january in it, we need to assign it the first year value in our list.
incident_years = []
jan = "Jan"
for item in incident_timestamps:
    if jan in item:
        incident_years.append(incident_year[0])
    else:
        incident_years.append(incident_year[2])

In [17]:
## creating month column
incident_months = []
jan = "Jan"
feb = "Feb"
mar = "Mar"
apr = "Apr"
may = "May"
jun = "Jun"
jul = "Jul"
aug = "Aug"
sep = "Sep"
oct_ = "Oct"
nov = "Nov"
dec = "Dec"
for item in incident_timestamps:
    if jan in item:
        incident_months.append("January")
    elif feb in item:
        incident_months.append("February")
    elif mar in item:
        incident_months.append("March")
    elif apr in item:
        incident_months.append("April")
    elif may in item:
        incident_months.append("May")
    elif jun in item:
        incident_months.append("June")
    elif jul in item:
        incident_months.append("July")
    elif aug in item:
        incident_months.append("August")
    elif sep in item:
        incident_months.append("September")
    elif oct_ in item:
        incident_months.append("October")
    elif nov in item:
        incident_months.append("November")
    elif dec in item:
        incident_months.append("December")
    else:
        incident_months.append("None")

In [18]:
## compling data into df

incident_dict = []
for (name,impact,timestamp,message,code,year,month) in zip(incident_names,incident_impacts,incident_timestamps,incident_messages,incident_codes,incident_years,incident_months):
    incident_dict.append({
        "Name":name,
        "Impact":impact,
        "Timestamp":timestamp,
        "Message":message,
        "Code":code,
        "Year":year,
        "Month":month
    })
df = pd.DataFrame.from_dict(incident_dict)
df

Unnamed: 0,Name,Impact,Timestamp,Message,Code,Year,Month
0,Order cancellations and order executions may b...,none,"Oct 14, 07:47 - Oct 17, 07:49 PDT",This incident has been resolved.,sf51lgl4lwlm,2022,October
1,Outage for PayID payments in Australia,critical,"Oct 13, 21:05 - Oct 14, 01:47 PDT",This incident has been resolved.,dy8mlp6wq4t6,2022,October
2,We're currently experiencing degraded performa...,none,"Oct 13, 11:43 - 15:54 PDT",This incident has been resolved.,93705zkdr4lf,2022,October
3,HBAR deposit address generation disabled,maintenance,"Oct 12, 12:19 - 18:24 PDT",This incident has been resolved.,f41j926st8d9,2022,October
4,[Scheduled] Scheduled Maintenance: Coinbase.com,maintenance,"Oct 12, 13:01 - 14:00 PDT",The scheduled maintenance has been completed.,kvhcxpbvx01k,2022,October
...,...,...,...,...,...,...,...
62,"Coinbase Pay sends for BCH, ETC, AVAX unavailable",major,"Aug 2, 13:26 - 14:37 PDT",This incident has been resolved.,c19pn5pm8h5c,2022,August
63,[Scheduled] Scheduled Maintenance: Coinbase.com,maintenance,"Aug 2, 13:00 - 14:00 PDT",The scheduled maintenance has been completed.,7p3rfn60hdph,2022,August
64,Advanced Trade RARE Delays,none,"Aug 2, 10:29 - 10:42 PDT",This incident has been resolved.,hm05n4rk2ytc,2022,August
65,Elevated Order Entry Errors,minor,"Jul 27, 12:45 - Aug 2, 10:32 PDT",This incident has been resolved.,37x8txryt05y,2022,July


In [149]:
##Now lets make a function that does all of that for us!
def compileIncidents(response):
    soup = BeautifulSoup(response.text, "html.parser")
    incident_div = soup.find("div",{'data-react-class': 'HistoryIndex'})
    s = incident_div["data-react-props"]
    start = s.find('"months":[') + 10
    end = s.find('}]}]', start)
    s = s[start:end].replace("{","").replace("}","")
    incident_list = list(s.split(',"'))
    clean_incidents = []
    for item in incident_list:
         clean_incidents.append(item.replace('"',"")\
                            .replace("\\u003cvar data-var=\'date\'\\u003e","")\
                            .replace("\\u003c/var\\u003e","")\
                            .replace("\\u003cvar data-var=\'time\'\\u003e","")\
                            .replace("\\u003cvar data-var=\'time\'\\u003e","")\
                            .replace("incidents:[","")\
                            .replace("PDT]","PDT")\
                            .replace("PST]","PST"))
    year = "year:"
    incident_year = [int(x.replace("year:","")) for x in clean_incidents if year in x]
    january = ":January"
    febraury = ":February"
    march = ":March"
    april = ":April"
    may = ":May"
    june = ":June"
    july = ":July"
    august = ":August"
    september = ":September"
    october = ":October"
    november = ":November"
    december = ":December"
    august = ":August"
    year = "year:"
    starts = "starts_on"
    days = "days:"
    cleaner_incidents = [ x for x in clean_incidents\
                     if january not in x\
                     and febraury not in x\
                     and march not in x\
                     and april not in x\
                     and may not in x\
                     and june not in x\
                     and july not in x\
                     and august not in x\
                     and september not in x\
                     and october not in x\
                     and november not in x\
                     and december not in x\
                     and days not in x\
                     and starts not in x\
                     and year not in x]
    name = "name:"
    incident_names = [x.replace("name:","") for x in cleaner_incidents if name in x]
    message = "message:"
    incident_messages = [x.replace("message:","") for x in cleaner_incidents if message in x]
    code = "code:"
    incident_codes = [x.replace("code:","") for x in cleaner_incidents if code in x]
    impact = "impact:"
    incident_impacts = [x.replace("impact:","") for x in cleaner_incidents if impact in x]
    timestamp = "timestamp:"
    incident_timestamps = [x.replace("timestamp:","") for x in cleaner_incidents if timestamp in x]
    incident_years = []
    jan = "Jan"
    for item in incident_timestamps:
        if jan in item:
            incident_years.append(incident_year[0])
        else:
            incident_years.append(incident_year[2])
    incident_months = []
    jan = "Jan"
    feb = "Feb"
    mar = "Mar"
    apr = "Apr"
    may_ = "May"
    jun = "Jun"
    jul = "Jul"
    aug = "Aug"
    sep = "Sep"
    oct_ = "Oct"
    nov = "Nov"
    dec = "Dec"
    for item in incident_timestamps:
        if jan in item:
            incident_months.append("January")
        elif feb in item:
            incident_months.append("February")
        elif mar in item:
            incident_months.append("March")
        elif apr in item:
            incident_months.append("April")
        elif may_ in item:
            incident_months.append("May")
        elif jun in item:
            incident_months.append("June")
        elif jul in item:
            incident_months.append("July")
        elif aug in item:
            incident_months.append("August")
        elif sep in item:
            incident_months.append("September")
        elif oct_ in item:
            incident_months.append("October")
        elif nov in item:
            incident_months.append("November")
        elif dec in item:
            incident_months.append("December")
        else:
            incident_months.append("None")
    incident_dict = []
    for (name,impact,timestamp,message,code,year,month) in zip(incident_names,incident_impacts,incident_timestamps,incident_messages,incident_codes,incident_years,incident_months):
        incident_dict.append({
            "Name":name,
            "Impact":impact,
            "Timestamp":timestamp,
            "Message":message,
            "Code":code,
            "Year":year,
            "Month":month
        })
    df = pd.DataFrame.from_dict(incident_dict)
    return df
    

In [150]:
##creating function to generate links
base_url = "https://status.coinbase.com/history?page="
def generateLinks(base_url,total_pages):
    '''
    Provide the base url with number of pages to generate links
    '''
    links = []
    for number in range(1, total_pages + 1): 
        links.append(f"{base_url}{number}") 
    return links

In [151]:
##creating snoozer
def sleepyTime():
    snoozer = randrange(30,60)
    print(f"snoozing for {snoozer} second before next scrape")
    time.sleep(snoozer)

In [152]:
##creating list processor
def processList(all_dfs, file_name):
    df = pd.concat(all_dfs, ignore_index = True)
    df.to_csv(file_name, encoding = "UTF-8", index = False)
    print(f"{file_name} is in you current folder")
    return df

In [153]:
##creating scraper function
def myScraper(url,total_pages,file_name):
    '''
    Input: List links you would like to scrape, total pages, and file name.
    Output: Final df, list of broken links.
    '''
    my_links = generateLinks(url,total_pages)
    all_dfs = []
    busted_links = []

    counter = 1
    for link in my_links:
        print(f"scraping {counter} of {len(my_links)}")
        counter += 1
        print(f"scraping {link}")
        response = requests.get(link)
        if response.status_code == 200:
            df = compileIncidents(response)
            all_dfs.append(df)
        else:
            print(f"{link} returned a posted link with response {response.status_code}")
            busted_links.append(link)
        if counter < len(my_links):
            sleepyTime()
        else:
            pass
    final_df = processList(all_dfs,file_name)
    print("all done!")
    return (final_df,busted_links)


In [154]:
url = "https://status.coinbase.com/history?page="
total_pages = 37
file_name = "Coinbase-Incidents.csv"
test_df = myScraper(url,total_pages,file_name)[0]
test_df

scraping 1 of 37
scraping https://status.coinbase.com/history?page=1
snoozing for 31 second before next scrape
scraping 2 of 37
scraping https://status.coinbase.com/history?page=2
snoozing for 50 second before next scrape
scraping 3 of 37
scraping https://status.coinbase.com/history?page=3
snoozing for 34 second before next scrape
scraping 4 of 37
scraping https://status.coinbase.com/history?page=4
snoozing for 58 second before next scrape
scraping 5 of 37
scraping https://status.coinbase.com/history?page=5
snoozing for 57 second before next scrape
scraping 6 of 37
scraping https://status.coinbase.com/history?page=6
snoozing for 42 second before next scrape
scraping 7 of 37
scraping https://status.coinbase.com/history?page=7
snoozing for 57 second before next scrape
scraping 8 of 37
scraping https://status.coinbase.com/history?page=8
snoozing for 43 second before next scrape
scraping 9 of 37
scraping https://status.coinbase.com/history?page=9
snoozing for 53 second before next scrape
s

Unnamed: 0,Name,Impact,Timestamp,Message,Code,Year,Month
0,Hedera send/receive delayed,none,"Oct 12, 10:13 PDT",We are currently investigating this issue.,j5bs06khzm2x,2022,October
1,Delayed Cardano (ADA) deposits and withdrawals,minor,"Oct 11, 11:23 - Oct 12, 06:59 PDT",This incident has been resolved.,jnd10j7lqff8,2022,October
2,Outage for PayID payments in Australia,critical,"Oct 12, 02:35 PDT",We're aware of an issue with our 3rd-party pay...,k92hmdlrldb2,2022,October
3,[Scheduled] Optimism scheduled maintenance,maintenance,"Oct 11, 11:00 - 11:30 PDT",The scheduled maintenance has been completed.,r820kxfvtbc5,2022,October
4,Delayed Bitcoin (BTC) Sends and Receives,none,"Oct 9, 20:50 - 22:58 PDT",This incident has been resolved.,hwl4m7f0cs92,2022,October
...,...,...,...,...,...,...,...
910,Pending transfers,none,"Dec 24, 14:37 - Dec 25, 07:20 PST",This incident has been resolved.,xm70l90z4j6r,2013,December
911,Login issues on some browsers.,none,"Dec 20, 16:24 - 16:31 PST",This incident has been resolved.,t2r1xq9lzyt6,2013,December
912,Outage,none,"Dec 10, 20:06 - 20:06 PST",Apologies for the downtime - service has been ...,rp11hz18xb28,2013,December
913,Outage,minor,"Dec 6, 16:40 - 18:09 PST",This incident has been resolved.,wvls2y6scbry,2013,December


In [156]:
test_df[test_df["Impact"] == "critical"]

Unnamed: 0,Name,Impact,Timestamp,Message,Code,Year,Month
2,Outage for PayID payments in Australia,critical,"Oct 12, 02:35 PDT",We're aware of an issue with our 3rd-party pay...,k92hmdlrldb2,2022,October
57,Coinbase.com is experiencing connectivity issues,critical,"Aug 3, 13:31 - 14:34 PDT",This incident has been resolved.,h5y9db55bwcq,2022,August
102,Coinbase.com is experiencing connectivity issues,critical,"Jun 21, 00:09 - 01:30 PDT",This incident has been resolved.,mj0ysm01x2nc,2022,June
209,iDEAL and Sofort payments delayed,critical,"Jan 11, 09:34 - 15:36 PST",This incident has been resolved.,rvmx7h9p4jqq,2022,January
239,Sofort and iDEAL Payments Currently Unavailable,critical,"Dec 7, 09:08 - 15:24 PST",This incident has been resolved.,01dwynn34xl0,2021,December
266,Coinbase Card Swipe issues,critical,"Oct 27, 12:38 - 14:31 PDT",This incident has been resolved.,fjtngsrsgx88,2021,October
267,Coinbase.com is experiencing connectivity issues,critical,"Oct 27, 07:18 - 11:13 PDT",This incident has been resolved.,v5dx8y2mtqx2,2021,October
414,"Card purchases, card withdrawals, and PayPal p...",critical,"Jun 3, 12:32 - 13:35 PDT",This incident has been resolved.,9l6j38p5mfs1,2021,June
417,SEPA Payment Service Outage,critical,"Jun 1, 16:35 - 18:38 PDT",This incident has been resolved.,0t5ln2lzhrdk,2021,June
433,Ethereum Berlin Network Upgrade,critical,"Apr 15, 04:50 - 15:27 PDT",This incident has been resolved.,w4fmfqypt630,2021,April


In [157]:
test_df[test_df["Impact"] == "major"]

Unnamed: 0,Name,Impact,Timestamp,Message,Code,Year,Month
8,"ACH withdrawals, deposits, and buys failures",major,"Oct 2, 03:57 - 09:41 PDT",This incident has been resolved.,b45vlknj3cml,2022,October
42,International USDC Trading,major,"Jul 19, 09:08 - Aug 23, 07:06 PDT",This incident has been resolved.,j49qg8j0h8hp,2022,July
58,"Coinbase Pay sends for BCH, ETC, AVAX unavailable",major,"Aug 2, 13:26 - 14:37 PDT",This incident has been resolved.,c19pn5pm8h5c,2022,August
92,Coinbase.com is experiencing connectivity issues,major,"Jun 27, 14:27 - 20:47 PDT",This incident has been resolved.,7bh5w0jqf4h3,2022,June
94,Coinbase Pay crypto sends are failing at high ...,major,"Jun 26, 09:58 - Jun 27, 10:20 PDT",This incident has been resolved.,k94rnj3nvkqh,2022,June
...,...,...,...,...,...,...,...
879,We're experiencing a high number of requests,major,"Dec 23, 11:36 - 11:41 PST",This incident has been resolved.,6ppglr77hl8c,2015,December
886,Coinbase Exchange Offline,major,"Sep 2, 16:41 - 16:55 PDT",Exchange is back up.,p8t93bjt6cvv,2015,September
890,Website outage,major,"Jul 13, 04:22 - 05:25 PDT",This incident has been resolved.,jqw9bv00vf2q,2015,July
891,Network Connectivity Issues,major,"Jul 6, 14:22 - 14:54 PDT",We have mitigated issues caused by our upstrea...,mwyzfbr3mvjb,2015,July


In [158]:
test_df[test_df["Name"] == "Coinbase.com is experiencing connectivity issues"]

Unnamed: 0,Name,Impact,Timestamp,Message,Code,Year,Month
57,Coinbase.com is experiencing connectivity issues,critical,"Aug 3, 13:31 - 14:34 PDT",This incident has been resolved.,h5y9db55bwcq,2022,August
92,Coinbase.com is experiencing connectivity issues,major,"Jun 27, 14:27 - 20:47 PDT",This incident has been resolved.,7bh5w0jqf4h3,2022,June
101,Coinbase.com is experiencing connectivity issues,major,"Jun 23, 11:08 - 11:50 PDT",This incident has been resolved.,wzv8ht040xyn,2022,June
102,Coinbase.com is experiencing connectivity issues,critical,"Jun 21, 00:09 - 01:30 PDT",This incident has been resolved.,mj0ysm01x2nc,2022,June
130,Coinbase.com is experiencing connectivity issues,minor,"Apr 25, 01:13 - 01:51 PDT",This incident has been resolved.,c6h0mn0262hq,2022,April
248,Coinbase.com is experiencing connectivity issues,major,"Nov 23, 16:21 - 17:56 PST",This incident has been resolved.,zsz65cz5clzy,2021,November
265,Coinbase.com is experiencing connectivity issues,major,"Oct 27, 12:40 - 14:54 PDT",This incident has been resolved.,5tv5swyq8jg3,2021,October
267,Coinbase.com is experiencing connectivity issues,critical,"Oct 27, 07:18 - 11:13 PDT",This incident has been resolved.,v5dx8y2mtqx2,2021,October
511,Coinbase.com is experiencing connectivity issues,minor,"Jan 7, 07:50 - 22:13 PST",This incident has been resolved.,3yd0mclly9b4,2021,January
525,Coinbase.com is experiencing connectivity issues,none,"Dec 16, 06:45 - 08:27 PST",This incident has been resolved.,93smh99lxtxz,2020,December
