In [1]:
from __future__ import print_function
import os.path
import pymongo
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from datetime import datetime, date, timedelta
from sshtunnel import SSHTunnelForwarder

In [2]:
class mongodb_tunnel:

    def __init__(self, host, usr, passwd,
                 host_address:str='127.0.0.1'):
        
        self.server = SSHTunnelForwarder(
            host,
            ssh_username=usr,
            ssh_password=passwd,
            remote_bind_address=('127.0.0.1', 27017)
        )

        self.server.start()

        self.client = pymongo.MongoClient(
            host=host_address,
            port=self.server.local_bind_port
        )
        
    def insert_many(self, db_name, collection_name, document_list):
        try:
            self.server.start()
        except:
            pass
        
        client = self.client
    
        database = client[db_name]
        collection = database[collection_name]
        
        try:
            collection.insert_many(document_list)
            print("success")
        except Exception as e:
            print("Got an error inserting documents")
            print(e)
        
        client.close()
        self.server.close()

In [3]:
def gmail_authentication(token_path:str='token.json', credentials_path:str='credentials.json'):
    SCOPES = ['https://mail.google.com/']
    creds = None
    if os.path.exists(token_path):
        creds = Credentials.from_authorized_user_file(token_path, SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

In [5]:
class gmail_info:
    def __init__(self):
        self.service = gmail_authentication()
    
    def get_mail_id(self):
        ### GMT +0900 기준(도쿄/서울 표준시)로 진행할 경우
        today_date = date.today()
        # today = ((datetime.combine(today_date, datetime.min.time()))+ timedelta(hours=9))
        # yesterday = today - timedelta(days=5)
        # query = f"before: {today.timestamp()} after: {yesterday.timestamp()}"
        query = f"before: {today_date} after: {today_date - timedelta(days=1)}"
        try:
            messages_id = self.service.users().messages().list(userId="me", q=query, maxResults=None).execute()
            return messages_id
        except Exception as e:
            print("authentication failed")
            print(e)

    def get_message_info(self):
        messages_id = self.get_mail_id()
        if messages_id.get('resultSizeEstimate') == 0:
            print("No update found")
            return None
        
        message_result = []
        for message in messages_id['messages']:
            try:
                msg = self.service.users().messages().get(userId='me', id=message['id']).execute()
                tmp_dict = {
                    'id' : msg.get('id'),
                    'labelIds' : msg.get('labelIds'),
                    'snippet' : msg.get('snippet')
                }
                headers = msg['payload']['headers']
                for item in headers:
                    tmp_dict['headers.'+item['name']] = item['value']
                message_result.append(tmp_dict)
            
            except Exception as e:
                print("error in importing messages metadata")
                print(e)
        return message_result
        

In [6]:
service = gmail_authentication()
tmp = gmail_info()

In [7]:
tmp.service

<googleapiclient.discovery.Resource at 0x106c8ea30>

In [8]:
documents = tmp.get_message_info()

In [9]:
documents

[{'id': '186c8af5f6a46932',
  'labelIds': ['CATEGORY_PROMOTIONS', 'UNREAD', 'INBOX'],
  'snippet': '2023-03-10 이수민님, 채용정보 검색 결과입니다. 저장된 나의 검색1의 공고 검색 결과입니다. 검색결과가 최근 찾으시는 공고와 다를 경우 설정하신 검색조건을 업데이트 해보세요 건설근로자공제회03/20 [전체] 2023년 제1차 채용 공고 삼성전자(주)03/15 2023년 대학생 인턴 모집 안내(DX부문) 삼성전자(주)03/15 2023년 상반기 3급',
  'headers.Delivered-To': 'sooh0601@gmail.com',
  'headers.Received': 'from sri-mailsend2a.saramin.co.kr (sri-mailsend2a.saramin.co.kr. [110.45.178.132])        by mx.google.com with SMTP id x11-20020a63cc0b000000b004fc25858c33si343176pgf.506.2023.03.09.15.23.00        for <sooh0601@gmail.com>;        Thu, 09 Mar 2023 15:23:01 -0800 (PST)',
  'headers.X-Google-Smtp-Source': 'AK7set/NVptn+dRt1IS8/omrlXF6JGLqc66F4BcYw4GF+1x/nTYBpjFPqRAm0eCGhTtgeT+iuH3a',
  'headers.X-Received': 'by 2002:a17:90b:1d0a:b0:237:9858:ebbf with SMTP id on10-20020a17090b1d0a00b002379858ebbfmr24449970pjb.30.1678404181889;        Thu, 09 Mar 2023 15:23:01 -0800 (PST)',
  'headers.ARC-Seal': 'i=1; a=rsa-sha256; t=1678

In [47]:
host = None
usr = None
passwd = None

tunnel = mongodb_tunnel(
    host = {your_hostaddress},
    usr = {your_usrname},
    passwd = {your_password}
)

In [48]:
db_name = None
collection_name = None

tunnel.insert_many(
    db_name=db_name,
    collection_name=collection_name,
    document_list=documents
)