Skip to content

Commit

Permalink
update Google Sheets Source after review #2
Browse files Browse the repository at this point in the history
  • Loading branch information
yevhenii-ldv committed Dec 30, 2020
1 parent 44847d8 commit 7180c59
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,32 +22,35 @@
SOFTWARE.
"""

from typing import Dict, List

import backoff
from apiclient import discovery, errors
from requests.status_codes import codes as status_codes
from googleapiclient import errors
from requests import codes as status_codes

from .helpers import SCOPES, Helpers


def error_handler(error):
return error.resp.status != status_codes.TOO_MANY_REQUESTS


class GoogleSheetsClient:
@staticmethod
def __init__(self, credentials: Dict[str, str], scopes: List[str] = SCOPES):
self.client = Helpers.get_authenticated_sheets_client(credentials, scopes)

@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
def get(client: discovery.Resource, **kwargs):
return client.get(**kwargs).execute()
def get(self, **kwargs):
return self.client.get(**kwargs).execute()

@staticmethod
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
def create(client: discovery.Resource, **kwargs):
return client.create(**kwargs).execute()
def create(self, **kwargs):
return self.client.create(**kwargs).execute()

@staticmethod
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
def get_values(client: discovery.Resource, **kwargs):
return client.values().batchGet(**kwargs).execute()
def get_values(self, **kwargs):
return self.client.values().batchGet(**kwargs).execute()

@staticmethod
@backoff.on_exception(backoff.expo, errors.HttpError, max_time=60, giveup=error_handler)
def update_values(client: discovery.Resource, **kwargs):
return client.values().batchUpdate(**kwargs).execute()
def update_values(self, **kwargs):
return self.client.values().batchUpdate(**kwargs).execute()
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def __init__(self):

def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
# Check involves verifying that the specified spreadsheet is reachable with our credentials.
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
spreadsheet_id = config["spreadsheet_id"]
try:
# Attempt to get first row of sheet
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1")
client.get(spreadsheetId=spreadsheet_id, includeGridData=False, ranges="1:1")
except errors.HttpError as err:
reason = str(err)
# Give a clearer message if it's a common error like 404.
Expand All @@ -64,13 +64,11 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
return AirbyteConnectionStatus(status=Status.SUCCEEDED)

def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(json.loads(config["credentials_json"]))
spreadsheet_id = config["spreadsheet_id"]
try:
logger.info(f"Running discovery on sheet {spreadsheet_id}")
spreadsheet_metadata = Spreadsheet.parse_obj(
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False)
)
spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
sheet_names = [sheet.properties.title for sheet in spreadsheet_metadata.sheets]
streams = []
for sheet_name in sheet_names:
Expand All @@ -88,7 +86,7 @@ def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
def read(
self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
) -> Generator[AirbyteMessage, None, None]:
client = Helpers.get_authenticated_sheets_client(json.loads(config["credentials_json"]))
client = GoogleSheetsClient(json.loads(config["credentials_json"]))

sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog)
spreadsheet_id = config["spreadsheet_id"]
Expand All @@ -106,7 +104,7 @@ def read(
range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}"
logger.info(f"Fetching range {range}")
row_batch = SpreadsheetValues.parse_obj(
GoogleSheetsClient.get_values(client, spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")
client.get_values(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")
)
row_cursor += ROW_BATCH_SIZE + 1
# there should always be one range since we requested only one
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,27 @@
from typing import Dict, FrozenSet, Iterable, List

from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog
from apiclient import discovery
from google.oauth2 import service_account
from googleapiclient import discovery

from .client import GoogleSheetsClient
from .models.spreadsheet import RowData, Spreadsheet

SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly", "https://www.googleapis.com/auth/drive.readonly"]


class Helpers(object):
@staticmethod
def get_authenticated_sheets_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource:
def get_authenticated_sheets_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource:
creds = Helpers.get_authenticated_google_credentials(credentials, scopes)
return discovery.build("sheets", "v4", credentials=creds).spreadsheets()

@staticmethod
def get_authenticated_drive_client(credentials: Dict[str, str], scopes=SCOPES) -> discovery.Resource:
def get_authenticated_drive_client(credentials: Dict[str, str], scopes: List[str] = SCOPES) -> discovery.Resource:
creds = Helpers.get_authenticated_google_credentials(credentials, scopes)
return discovery.build("drive", "v3", credentials=creds)

@staticmethod
def get_authenticated_google_credentials(credentials: Dict[str, str], scopes=SCOPES):
def get_authenticated_google_credentials(credentials: Dict[str, str], scopes: List[str] = SCOPES):
return service_account.Credentials.from_service_account_info(credentials, scopes=scopes)

@staticmethod
Expand Down Expand Up @@ -87,10 +86,8 @@ def get_formatted_row_values(row_data: RowData) -> List[str]:
return [value.formattedValue for value in row_data.values]

@staticmethod
def get_first_row(client: discovery.Resource, spreadsheet_id: str, sheet_name: str) -> List[str]:
spreadsheet = Spreadsheet.parse_obj(
GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1")
)
def get_first_row(client, spreadsheet_id: str, sheet_name: str) -> List[str]:
spreadsheet = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet_name}!1:1"))

# There is only one sheet since we are specifying the sheet in the requested ranges.
returned_sheets = spreadsheet.sheets
Expand Down Expand Up @@ -134,7 +131,7 @@ def row_data_to_record_message(sheet_name: str, cell_values: List[str], column_i

@staticmethod
def get_available_sheets_to_column_index_to_name(
client: discovery.Resource, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]]
client, spreadsheet_id: str, requested_sheets_and_columns: Dict[str, FrozenSet[str]]
) -> Dict[str, Dict[int, str]]:
available_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id)

Expand All @@ -151,8 +148,8 @@ def get_available_sheets_to_column_index_to_name(
return available_sheets_to_column_index_to_name

@staticmethod
def get_sheets_in_spreadsheet(client: discovery.Resource, spreadsheet_id: str):
spreadsheet_metadata = Spreadsheet.parse_obj(GoogleSheetsClient.get(client, spreadsheetId=spreadsheet_id, includeGridData=False))
def get_sheets_in_spreadsheet(client, spreadsheet_id: str):
spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
return [sheet.properties.title for sheet in spreadsheet_metadata.sheets]

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from typing import Dict

from airbyte_protocol import ConfiguredAirbyteCatalog, ConnectorSpecification
from apiclient import discovery
from base_python_test import StandardSourceTestIface
from google_sheets_source.client import GoogleSheetsClient
from google_sheets_source.helpers import Helpers
Expand Down Expand Up @@ -62,7 +61,7 @@ def get_catalog(self) -> ConfiguredAirbyteCatalog:
def setup(self) -> None:
Path(self._get_tmp_dir()).mkdir(parents=True, exist_ok=True)

sheets_client = Helpers.get_authenticated_sheets_client(self._get_creds(), SCOPES)
sheets_client = GoogleSheetsClient(self._get_creds(), SCOPES)
spreadsheet_id = self._create_spreadsheet(sheets_client)
self._write_spreadsheet_id(spreadsheet_id)

Expand Down Expand Up @@ -90,7 +89,7 @@ def _get_creds(self) -> Dict[str, str]:
def _get_tmp_dir():
return "/test_root/gsheet_test"

def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
def _create_spreadsheet(self, sheets_client: GoogleSheetsClient) -> str:
"""
:return: spreadsheetId
"""
Expand All @@ -99,7 +98,7 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
"sheets": [{"properties": {"title": "sheet1"}}, {"properties": {"title": "sheet2"}}],
}

spreadsheet = Spreadsheet.parse_obj(GoogleSheetsClient.create(sheets_client, body=request))
spreadsheet = Spreadsheet.parse_obj(sheets_client.create(body=request))
spreadsheet_id = spreadsheet.spreadsheetId

rows = [["header1", "irrelevant", "header3", "", "ignored"]]
Expand All @@ -109,13 +108,11 @@ def _create_spreadsheet(self, sheets_client: discovery.Resource) -> str:
rows.append(["", "", ""])
rows.append(["orphan1", "orphan2", "orphan3"])

GoogleSheetsClient.update_values(
sheets_client,
sheets_client.update_values(
spreadsheetId=spreadsheet_id,
body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet1"}, "valueInputOption": "RAW"},
)
GoogleSheetsClient.update_values(
sheets_client,
sheets_client.update_values(
spreadsheetId=spreadsheet_id,
body={"data": {"majorDimension": "ROWS", "values": rows, "range": "sheet2"}, "valueInputOption": "RAW"},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
"""

import unittest
from unittest.mock import Mock
from unittest.mock import Mock, patch

from airbyte_protocol import AirbyteRecordMessage, AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream
from google_sheets_source.client import GoogleSheetsClient
from google_sheets_source.helpers import Helpers
from google_sheets_source.models import CellData, GridData, RowData, Sheet, SheetProperties, Spreadsheet

Expand Down Expand Up @@ -142,8 +143,10 @@ def test_get_first_row(self):

client = Mock()
client.get.return_value.execute.return_value = fake_response

actual = Helpers.get_first_row(client, spreadsheet_id, sheet)
with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
sheet_client.client = client
actual = Helpers.get_first_row(sheet_client, spreadsheet_id, sheet)
self.assertEqual(expected_first_row, actual)
client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=True, ranges=f"{sheet}!1:1")

Expand All @@ -154,8 +157,10 @@ def test_get_sheets_in_spreadsheet(self):
client.get.return_value.execute.return_value = Spreadsheet(
spreadsheetId=spreadsheet_id, sheets=[Sheet(properties=SheetProperties(title=t)) for t in expected_sheets]
)

actual_sheets = Helpers.get_sheets_in_spreadsheet(client, spreadsheet_id)
with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
sheet_client.client = client
actual_sheets = Helpers.get_sheets_in_spreadsheet(sheet_client, spreadsheet_id)

self.assertEqual(expected_sheets, actual_sheets)
client.get.assert_called_with(spreadsheetId=spreadsheet_id, includeGridData=False)
Expand Down Expand Up @@ -186,9 +191,11 @@ def mock_client_call(spreadsheetId, includeGridData, ranges=None):

client = Mock()
client.get.side_effect = mock_client_call

with patch.object(GoogleSheetsClient, "__init__", lambda s, credentials, scopes: None):
sheet_client = GoogleSheetsClient({"fake": "credentials"}, ["auth_scopes"])
sheet_client.client = client
actual = Helpers.get_available_sheets_to_column_index_to_name(
client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])}
sheet_client, spreadsheet_id, {sheet1: frozenset(sheet1_first_row), "doesnotexist": frozenset(["1", "2"])}
)
expected = {sheet1: {0: "1", 1: "2", 2: "3", 3: "4"}}

Expand Down

0 comments on commit 7180c59

Please sign in to comment.