In [100]:
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup


import os

class MySelenium:
    def wait_presence_of_element_located(self, expected_conditions, wait_time=10):
        element = WebDriverWait(self.driver, wait_time).until(EC.presence_of_element_located(expected_conditions))
        return element
    
    def wait_element_to_be_clickable(self, expected_conditions, wait_time=10):
        element = WebDriverWait(self.driver, wait_time).until(EC.element_to_be_clickable(expected_conditions))
        return element
    

class GoogleMail(MySelenium):
    
    # def __init__(self):

    def connect_gmail(self):
        self.driver = webdriver.Chrome(executable_path='./lib/chromedriver')
        self.driver.set_window_position(1280, 668)
        self.driver.set_window_size(1280, 668)
        self.driver.get('https://mail.google.com')
        
    def close_browser(self):
        self.driver.close()
        
    def input_id(self, my_id=os.environ['GOOGLE_ID']):
        self.wait_presence_of_element_located((By.ID, 'identifierId')).send_keys(my_id)
        
    def next_id(self):
        self.wait_presence_of_element_located((By.ID, 'identifierNext')).click()
        
    def input_pw(self, my_password=os.environ['GOOGLE_PW']):
        self.wait_presence_of_element_located((By.NAME, 'password')).send_keys(my_password)
        
    def next_pw(self):
        self.wait_element_to_be_clickable((By.XPATH, "//span[text()='다음']")).click()
    
    def input_search_text(self, search_text):
        self.search_text = search_text
        input_text = self.wait_presence_of_element_located((By.XPATH, '//*[@id="aso_search_form_anchor"]/div/input'))
        input_text.clear()
        input_text.send_keys(search_text, Keys.ENTER)
        
    def click_mail_list(self):
        try:
            mail_list = self.wait_presence_of_element_located((By.XPATH, '/html/body/div[7]/div[3]/div/div[2]/div[1]/div[2]/div/div/div/div/div[2]/div/div[1]/div/div[2]/div[4]/div[1]/div/table/tbody/tr/td[6]/div[1]/div/div[2]/span/span'),3)
            mail_list.click()
            return True
        except TimeoutException:
            print('can\'t find', self.search_text)
            return False
        
    def click_view_mail(self):
        self.wait_presence_of_element_located((By.XPATH, '//a[text() = "전체 메일 보기"]')).click()

    def download_file(self):
        self.driver.switch_to.window(self.driver.window_handles[1])
        self.wait_presence_of_element_located((By.XPATH, '//a[contains(text(), "다운로드")]')).click()
        self.driver.execute_script("window.close()") 
        self.driver.switch_to.window(self.driver.window_handles[0])
        
    def click_gmail_home(self):
        gmail_home = self.wait_presence_of_element_located((By.XPATH, '//a[@href="#inbox"][@title="Gmail"]/img[1]'))
        gmail_home.click()
        

class KakaoCard(MySelenium):
    def open_browser(self):
        self.driver = webdriver.Chrome(executable_path='./lib/chromedriver')
        self.header = ['card_number']
    
    def close_browser(self):
        self.driver.close()
        
    def input_url(self, url):
        # 예시 : 'file:///Users/emflant/Downloads/KB_20181105.html'
        self.driver.get('file://' + url)
        
    def input_pw(self):
        pwd1 = self.wait_presence_of_element_located((By.XPATH, '//*[@id="pwd1"]'))
        pwd1.send_keys(os.environ['KAKKO_MAIL_PW'], Keys.ENTER)
        
    def get_list(self, result_type='tuple'):
        table = self.wait_presence_of_element_located((By.XPATH, '//*[@id="mArticle"]/div[3]/table/tbody'))
        html_doc = table.get_attribute('outerHTML')
        soup = BeautifulSoup(html_doc, 'html.parser')
        
        tr_list = soup.find_all('tr')

        card_list = []

        for tr in tr_list:
            soup_tr = BeautifulSoup(str(tr), 'html.parser')
            td_list = soup_tr.find_all('td')

            td_li = []
            
            # 카드번호/이용일/정상구분/가맹점명/금액/캐시백
            for td in td_list:
                soup_td = BeautifulSoup(str(td), 'html.parser')
                td_li.append(soup_td.get_text().strip())
                # print(soup_td.get_text().strip())
            
            card_list.append(tuple(td_li))
            
        return card_list
    
class HanaCard(MySelenium):
    
    def open_browser(self):
        self.driver = webdriver.Chrome(executable_path='./lib/chromedriver')
        self.header = ['card_number']
    
    def close_browser(self):
        self.driver.close()
        
    def input_url(self, url):
        # 예시 : 'file:///Users/emflant/Downloads/KB_20181105.html'
        self.driver.get('file://' + url)
        
    def input_pw(self):
        pwd1 = self.wait_presence_of_element_located((By.XPATH, '//*[@id="password"]'))
        pwd1.send_keys(os.environ['KAKKO_MAIL_PW'], Keys.ENTER)

    def click_menu(self):
        menu = self.wait_presence_of_element_located((By.XPATH, '//*[@id="1"]/li/a[2]/img'))
        menu.click()
        
    def get_list(self, result_type='tuple'):
        
        period = self.wait_presence_of_element_located((By.XPATH,'//*[@id="tab2"]/table/tbody/tr[4]/td/table/tbody/tr[3]/td/table/tbody/tr[2]/td[3]/div'))
        # print(period.get_attribute('innerHTML'))
        # //*[@id="tab2"]/table/tbody/tr[4]/td/table/tbody/tr[5]/td/table/tbody/tr[2]/td/table/tbody
        # 
        table = self.wait_presence_of_element_located((By.XPATH,'//*[@id="tab2"]/table/tbody/tr[4]/td/table/tbody/tr[5]/td/table/tbody/tr[2]/td/table/tbody'))
        html_doc = table.get_attribute('outerHTML')
        soup = BeautifulSoup(html_doc, 'html.parser')
        
        tr_list = soup.find_all('tr')

        card_list = []

        for tr in tr_list:
            soup_tr = BeautifulSoup(str(tr), 'html.parser')
            td_list = soup_tr.find_all('td')

            td_li = [period.get_attribute('innerHTML')]
            
            j = 0
            
            for td in td_list:
                soup_td = BeautifulSoup(str(td), 'html.parser')
                
                if j == 0 :
                    td_li.append('\'' + soup_td.get_text().strip())
                else :
                    td_li.append(soup_td.get_text().strip())
                    
                j = j + 1
            
            # html 읽어서, 12개 element 가 아닌건 제외.
            if len(td_li) != 12 :
                continue
            
            card_list.append(tuple(td_li))
            
        return card_list
    

In [2]:
#from GoogleMail import GoogleMail
gm = GoogleMail()
gm.connect_gmail()
gm.input_id()
gm.next_id()
gm.input_pw()
gm.next_pw()


for i in range(3,13):
    search_text = 'subject:(이용내역서(2018년' + str(i).zfill(2) + '월))'
    gm.input_search_text(search_text)
    
    if gm.click_mail_list() == False:
        continue
       
    gm.click_view_mail()
    gm.download_file()
    gm.click_gmail_home()
    
gm.close_browser()

can't find subject:(이용내역서(2018년03월))
can't find subject:(이용내역서(2018년04월))
can't find subject:(이용내역서(2018년05월))
can't find subject:(이용내역서(2018년12월))


In [57]:
'''
kakko html 파일읽어서 거래내역 가져오기.
'''
from pathlib import Path
from pprint import pprint
import csv
import os

kc = KakaoCard()
kc.open_browser()

p = Path(os.environ['DOWNLOAD_FOLDER'])
result = []

for child1 in p.iterdir() :
    
    if child1.name[-4:] == 'html' and child1.name[0:3] == 'KB_'and len(child1.name) == 16:
        pass
    else :
        continue
    
    kc.input_url(str(child1))
    kc.input_pw()
    result = result + kc.get_list() # 리스트끼리 더하기
    # print(child1, len(result))
    
    
# pprint(result)
kc.close_browser()

In [31]:
'''
카카오카드내역 엑셀로 파일 저장
'''

import xlwings as xw


app = xw.App(visible=False)
# visible= false
wb = app.books.add()
sht = wb.sheets['Sheet1']

sht.range('A1').value = ['카드번호', '이용일', '정상구분', '가맹점명', '금액', '캐시백']
sht.range('A2').value = result

wb.save(r'/Users/emflant/Downloads/kakko_card.xlsx')
app.kill()

In [None]:
'''
카카오카드내역 CSV 파일로 저장시
'''

with open(os.environ['DOWNLOAD_FOLDER']+'/kakko_card.csv', 'w', newline='', encoding='utf-8') as csvfile:
    w = csv.writer(csvfile)
    
    w.writerow(['카드번호', '이용일', '정상구분', '가맹점명', '금액', '캐시백'])
    
    for row in result :
        w.writerow(row)

In [101]:
'''
hanacard html 파일읽어서 거래내역 가져오기.
ex) hanacard_20180621.html

'''
from pathlib import Path
from pprint import pprint
import csv
import os

hc = HanaCard()
hc.open_browser()

p = Path(os.environ['DOWNLOAD_FOLDER'])
hc_result = []

for child1 in p.iterdir() :
    
    if child1.name[-4:] == 'html' and child1.name[0:9] == 'hanacard_'and len(child1.name) == 22:
        pass
    else :
        continue
    
    hc.input_url(str(child1))
    hc.input_pw()
    hc_result = hc_result + hc.get_list() # 리스트끼리 더하기
    # print(child1, len(result))
    
    
# pprint(result)
hc.close_browser()

In [103]:
'''
카카오카드내역 엑셀로 파일 저장
'''

import xlwings as xw

app = xw.App(visible=False)
# visible= false
wb = app.books.add()
sht = wb.sheets['Sheet1']

sht.range('A1').value = ('이용기간', '이용일자', '이용가맹점', '이용금액', '할부기간', '회차', '원금', '수수료', '이용해택', '해택금액', '결재후잔액', '포인트')
sht.range('A2').value = hc_result

wb.save(r'/Users/emflant/Downloads/hana_card.xlsx')
app.kill()

In [84]:
import xlwings as xw

app = xw.App(visible=False)

In [85]:
wb = app.books.add()

In [86]:
sht = wb.sheets['Sheet1']

In [87]:
sht.range('A1').value = '\'2018.01.31'