In [1]:
# author: Ashu Tayal
# Wed, Mar 17 2021
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time 
from selenium import webdriver
import os
import csv

# Discovercard.com

In [2]:
# discovercard.com
url = 'https://www.discovercard.com/application/website/ratesrewards?srcCde=GEGX&8&irgwc=1&gclid=_yy6dgezjcckfqlsgkk0sohzidf2xpmp3vbymrxs100&sid=0&pid=156490&aid=568217&source=Affiliates&sku=109'
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')

In [3]:
def summary(name):
    table = pd.DataFrame(columns=["Variable",name])

    variables = {'Annual Fee': fee_annual,
                 'Regular Puchase APR': apr_purchase, 
                 'Regular Balance Transfer APR': apr_transfer,
                 'Cash Advance APR': apr_cash,
                 'Cash Advance Fee': fee_cash}

    for key, value in variables.items():
        table = table.append({'Variable': key,
                              name: value}, ignore_index = True)
    return(table)

In [4]:
# Interest Rates
fgir = soup.find_all('div', class_ = 'flex-grid interest-row')

In [5]:
fgir[0] # Purchases APR

<div class="flex-grid interest-row">
<h5 class="col-small">Annual Percentage Rate (APR) for Purchases</h5>
<div class="col-medium">
<p class="apr-value purchase-rate-apr apr-value-big">22.99%</p>
<p>This APR will vary with the market based on the Prime Rate.</p>
</div>
</div>

In [6]:
apr_purchase = soup.find("p", {"class" : "apr-value purchase-rate-apr apr-value-big"}).text
apr_purchase

'22.99%'

In [7]:
fgir[1] # Balance Transfer APR

<div class="flex-grid interest-row">
<h5 class="col-small">APR for Balance Transfers</h5>
<div class="col-medium apr-for-balance-transfers">
<p>
<b class="balance-transfer-rate">10.99%</b>
                        intro APR for 6 months from date of first
                        transfer, for transfers that post to your account
                        by June 10, 2021.
                    </p>
<p class="indented-paragraph">
                        After the intro APR expires, your APR will be <b class="after-the-intro-apr-expires-apr">22.99%</b> This APR
                        will vary with the market
                        based on the Prime Rate.
                    </p>
</div>
</div>

In [8]:
apr_transfer = soup.find("b", {"class" : "balance-transfer-rate"}).text
apr_transfer

'10.99%'

In [9]:
fgir[2] # Cash Advanced APR

<div class="flex-grid interest-row">
<h5 class="col-small">APR for Cash Advances</h5>
<div class="col-medium">
<p><b class="cash-rate">24.99%</b></p>
<p class="indented-paragraph">This APR will vary with the market based on the Prime Rate.</p>
</div>
</div>

In [10]:
apr_cash = soup.find("b", {"class" : "cash-rate"}).text
apr_cash

'24.99%'

In [11]:
# Fees
fees = soup.find_all('div', class_ = 'flex-grid fee-row')

In [12]:
fees[0] # Annual Fees

<div class="flex-grid fee-row">
<div class="col-small">Annual Fee</div>
<div class="col-medium"><b>None</b></div>
</div>

In [13]:
fee_annual = fees[0].find("div", {"class" : "col-medium"}).text
fee_annual

'None'

In [14]:
transfer_fee = soup.find_all('div', class_ = 'flex-grid fee-row balance-transfer-fee') # Balance Transfer Fee
transfer_fee = transfer_fee[0].find('p').text.strip().replace('\n', ' ').replace('  ', '')
transfer_fee

'Intro fee of 3% of the amount of each transfer for transfers that post to your account by June 10, 2021 with the 10.99% intro APR balance transfer offer described above. After that, 5% of the amount of each transfer.'

In [15]:
fees[2] # Cash Advance Fee (Intro)

<div class="flex-grid fee-row">
<div class="col-small">Cash Advance</div>
<div class="col-medium">
                Either <b>$10</b> or <b>5%</b> of the amount of each cash advance, whichever is greater.
            </div>
</div>

In [16]:
fee_cash = fees[2].find("div", {"class" : "col-medium"}).text
fee_cash = fee_cash.strip()
fee_cash

'Either $10 or 5% of the amount of each cash advance, whichever is greater.'

In [17]:
discover = summary('discovercard.com')
discover

Unnamed: 0,Variable,discovercard.com
0,Annual Fee,
1,Regular Puchase APR,22.99%
2,Regular Balance Transfer APR,10.99%
3,Cash Advance APR,24.99%
4,Cash Advance Fee,Either $10 or 5% of the amount of each cash ad...


# Credit Karma Website

In [18]:
# creditkarma.com

url = 'https://www.creditkarma.com/creditcard/CCDiscover06'
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')

'$0' in soup # prints true if '$0' found in soup, shows beautifulsoup didn't work

False

In [19]:
# Trying Selenium

url = "https://www.creditkarma.com/creditcard/CCDiscover06"
driver = webdriver.Chrome('/Users/ashutayal/downloads/chromedriver')
driver.get(url)
time.sleep(5) 
html = driver.page_source 
soup = BeautifulSoup(html, "html.parser")
test = '$0' in soup.text # Testing to see if scrape successful
driver.close()
test # prints true if '$0' found in soup

True

In [20]:
# Fees & Rates
fees = soup.find_all('dl', class_ = 'flex justify-between f5 lh-copy bb b--light-gray mv0 mv1 pv2 ckoffer-bn--last-of-type')
fees

[<dl class="flex justify-between f5 lh-copy bb b--light-gray mv0 mv1 pv2 ckoffer-bn--last-of-type"><dt class="ttc ck-black-70"><span><span>Annual fee</span></span></dt><dd class="tr"><span><span>$0</span></span></dd></dl>,
 <dl class="flex justify-between f5 lh-copy bb b--light-gray mv0 mv1 pv2 ckoffer-bn--last-of-type"><dt class="ttc ck-black-70"><span><span>Intro purchase APR</span></span></dt><dd class="tr"><span><span>N/A</span></span></dd></dl>,
 <dl class="flex justify-between f5 lh-copy bb b--light-gray mv0 mv1 pv2 ckoffer-bn--last-of-type"><dt class="ttc ck-black-70"><span><span>Regular purchase APR</span></span></dt><dd class="tr"><span><span>22.99% Variable</span></span></dd></dl>,
 <dl class="flex justify-between f5 lh-copy bb b--light-gray mv0 mv1 pv2 ckoffer-bn--last-of-type"><dt class="ttc ck-black-70"><span><span>Intro balance transfer APR</span></span></dt><dd class="tr"><span><span>10.99% for 6 Months</span></span></dd></dl>,
 <dl class="flex justify-between f5 lh-copy

In [21]:
# Annual Fee
fee_annual = fees[0].find_all("span")[3].text
fee_annual

'$0'

In [22]:
# Regular Purchase APR
apr_purchase = fees[2].find_all("span")[3].text
apr_purchase

'22.99% Variable'

In [23]:
# Balance Transfer APR
apr_transfer = fees[4].find_all("span")[3].text
apr_transfer

'22.99% Variable'

In [24]:
# Cash Advance APR
apr_cash = fees[5].find_all("span")[3].text
apr_cash

'24.99% Variable'

In [25]:
# Cash Advance Fee
fee_cash = fees[6].find_all("span")[3].text
fee_cash

'See Terms*'

In [26]:
creditkarma = summary('creditkarma.com')
creditkarma

Unnamed: 0,Variable,creditkarma.com
0,Annual Fee,$0
1,Regular Puchase APR,22.99% Variable
2,Regular Balance Transfer APR,22.99% Variable
3,Cash Advance APR,24.99% Variable
4,Cash Advance Fee,See Terms*


# Google Sheet

In [27]:
# Google Sheet

url = 'https://docs.google.com/spreadsheets/d/1sEMFJbVyFeEsy87fRRe9BzHMDfi7SssrSywH7W02JSw/edit?usp=sharing'
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')

tables = soup.find_all("table")

In [28]:
# source: https://stackoverflow.com/questions/61152242/how-to-obtain-data-from-a-public-google-sheets-using-python
index = 0
for table in tables:
    with open(str(index) + ".csv", "w") as f:
        wr = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC, lineterminator='\n')
        wr.writerows([[td.text for td in row.find_all("td")] for row in table.find_all("tr")])
    index = index + 1

# outputs csv to WD as '0.csv'

In [29]:
path = os.getcwd()
sheet = pd.read_csv(os.path.join(path,'0.csv'))
sheet = sheet.head(1) # write and read successful
sheet

Unnamed: 0,CatalogItemId,Name,CurrentPrice,CurrentPriceDescription,Currency,IntroPurchaseApr,IntroPurchaseAprType,IntroPurchaseAprDescription,IntroPurchaseAprPeriod,IntroPurchaseAprPeriodType,...,CashAdvanceFeeDescription,LatePaymentFee,LatePaymentFeeDescription,ReturnCheckFee,ForeignTransactionFee,ForeignTransactionFeeDescription,MinimumDeposit,RewardsType,RewardsRate,RewardsRate2
0,109.0,Discover it® Secured Credit Card,0.0,$0,USD,,,,,,...,,,,,0.0,,200.0,Cashback,2%,1%


In [30]:
fee_annual = sheet['CurrentPriceDescription'][0]
fee_annual

'$0'

In [31]:
apr_purchase = sheet['NonIntroPurchaseAprDescription'][0]
apr_purchase

'22.99% Variable APR'

In [32]:
fee_cash = sheet['CashAdvanceFeeDescription'][0]
fee_cash

nan

In [33]:
apr_cash = sheet['CashAdvanceAprDescription'][0]
apr_cash

'24.99% Variable APR'

In [34]:
apr_transfer = sheet['NonIntroTransferAprDescription'][0]
apr_transfer

'22.99% Variable APR'

In [35]:
googlesheets = summary('googlesheets')
googlesheets

Unnamed: 0,Variable,googlesheets
0,Annual Fee,$0
1,Regular Puchase APR,22.99% Variable APR
2,Regular Balance Transfer APR,22.99% Variable APR
3,Cash Advance APR,24.99% Variable APR
4,Cash Advance Fee,


In [36]:
# join all tables
summary_table = pd.merge(pd.merge(creditkarma, discover, on = 'Variable'), googlesheets, on = 'Variable')
summary_table

Unnamed: 0,Variable,creditkarma.com,discovercard.com,googlesheets
0,Annual Fee,$0,,$0
1,Regular Puchase APR,22.99% Variable,22.99%,22.99% Variable APR
2,Regular Balance Transfer APR,22.99% Variable,10.99%,22.99% Variable APR
3,Cash Advance APR,24.99% Variable,24.99%,24.99% Variable APR
4,Cash Advance Fee,See Terms*,Either $10 or 5% of the amount of each cash ad...,


In [37]:
# Output to csv
summary_table.to_csv('final_df.csv') # outputs to WD
