From e056294d106b00720e9f1eb5444089efe087d17c Mon Sep 17 00:00:00 2001 From: Daiyi Yang Date: Tue, 19 Sep 2023 15:08:48 -0700 Subject: [PATCH] Update docker entry point and make bigquery credential not required when starting --- Dockerfile | 2 +- .../app/data_source/bigquery/bigquery_source.py | 17 +++++++++++------ backend/run.py | 4 ++++ 3 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 backend/run.py diff --git a/Dockerfile b/Dockerfile index 53f1057..9629e98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,4 +26,4 @@ WORKDIR /app/backend EXPOSE 5001 # Run Flask application -CMD . /opt/venv/bin/activate && exec flask run -h 0.0.0.0 -p 5001 +CMD . /opt/venv/bin/activate && exec python run.py diff --git a/backend/app/data_source/bigquery/bigquery_source.py b/backend/app/data_source/bigquery/bigquery_source.py index c588c4e..c799433 100644 --- a/backend/app/data_source/bigquery/bigquery_source.py +++ b/backend/app/data_source/bigquery/bigquery_source.py @@ -10,15 +10,20 @@ class BigquerySource: - def __init__(self) -> None: - self.client = bigquery.Client() + client: bigquery.Client = None + + def get_client(self) -> bigquery.Client: + if self.client is None: + self.client = bigquery.Client() + + return self.client @staticmethod def convert_field_type(bq_type: str) -> str: pass def get_schema(self, full_name: str) -> BigquerySchema: - table = self.client.get_table(full_name) + table = self.get_client().get_table(full_name) selections = ','.join( [f'APPROX_COUNT_DISTINCT({field.name}) as {field.name}' for field in table.schema if field.field_type != 'RECORD']) @@ -59,7 +64,7 @@ def get_schema(self, full_name: str) -> BigquerySchema: return schema def list_dataset(self) -> list[Dataset]: - dataset_list_res = self.client.list_datasets() + dataset_list_res = self.get_client().list_datasets() return [Dataset( name=dataset.dataset_id, @@ -68,7 +73,7 @@ def list_dataset(self) -> list[Dataset]: for dataset in dataset_list_res] def list_tables(self, dataset: Dataset = None) -> list[BigquerySchema]: - tables = self.client.list_tables(dataset) + tables = self.get_client().list_tables(dataset) schemas = [] for row in tables: schema = self.get_schema(row.full_table_id) @@ -85,5 +90,5 @@ def run_queries_in_parallel(self, queries) -> list[RowIterator]: def run_query(self, query) -> RowIterator: # Run the query and return the results - query_job = self.client.query(query) + query_job = self.get_client().query(query) return query_job.result() diff --git a/backend/run.py b/backend/run.py new file mode 100644 index 0000000..8c78ddf --- /dev/null +++ b/backend/run.py @@ -0,0 +1,4 @@ +from app import app + +if __name__ == '__main__': + app.run(debug=True, port=5001, host="::", threaded=False, processes=4)