In [1]:
# Print every output from a specific cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Adjusting the screen size
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [3]:
#! pip freeze
#! pip install gspread oauth2client df2gspread

In [4]:
#Importing the module
import gspread
from df2gspread import df2gspread as d2g
from oauth2client.service_account import ServiceAccountCredentials

In [5]:
from google.cloud import bigquery
import json

In [6]:
GCP_CREDENTIALS = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
GCP_CREDENTIALS

In [7]:
#The scope is always look like this so we did not need to change anything
scope = [
   'https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(GCP_CREDENTIALS, scope)
gc = gspread.authorize(credentials)

In [23]:
# Cliente BQ
BQ_CLIENT = bigquery.Client.from_service_account_json(GCP_CREDENTIALS)

*****

## GSheets

In [8]:
spreadsheet_key = '1M7HrZAb1RlMeqKQ0PfiHeCfiqAztMJzLd1ibIIHsing'
worksheet = 'GenerateDatasetsProcess'

In [11]:
#Opening the worksheet by using Worksheet ID
workbook = gc.open_by_key(spreadsheet_key)
#Selecting which sheet to pulling the data
sheet = workbook.worksheet(worksheet)
#Pulling the data and transform it to the data frame
gsheet_data = sheet.get_all_values()

In [12]:
gsheet_data[1]

['nicolas.olivera@pedidosya.com',
 'peya-argentina',
 '',
 'nicolas',
 'olivera',
 'nicolas.olivera@pedidosya.com',
 'user_nicolas_olivera',
 'peya-argentina']

## Manual 

In [40]:
## Generación manual de datasets
# [email,dataset,project]
manual_data = [
    ["alvaro.calero@pedidosya.com","user_alvaro_calero","peya-argentina"],
    ["alessio.aveggio@pedidosya.com","user_alessio_aveggio","peya-delivery-and-support"]
]

In [41]:
manual_data

[['alvaro.calero@pedidosya.com', 'user_alvaro_calero', 'peya-argentina'],
 ['alessio.aveggio@pedidosya.com',
  'user_alessio_aveggio',
  'peya-delivery-and-support']]

## Set datasource

In [42]:
pos_email = -3
pos_dataset = -2
pos_project = -1

In [43]:
# values = gsheet_data
values = manual_data

In [44]:
# test data
for user in values[0:1]:
    print("user email: {e} \nuser project: {d} \nuser dataset: {s}\n****".format(e=user[pos_email], d=user[pos_project], s=user[pos_dataset]))
    

user email: alvaro.calero@pedidosya.com 
user project: peya-argentina 
user dataset: user_alvaro_calero
****


*********

In [45]:
cleaned_rows = []
excluded_users = []
for row in values:
    if len(row[pos_email]) > 5 and len(row[pos_project]) > 5:
        cleaned_rows.append([row[pos_email],row[pos_dataset],row[pos_project]])
    else:
        if len(row[pos_email]) > 1:
            excluded_users.append(row[pos_email])

In [46]:
excluded_users

[]

In [47]:
cleaned_rows[0:]

[['alvaro.calero@pedidosya.com', 'user_alvaro_calero', 'peya-argentina'],
 ['alessio.aveggio@pedidosya.com',
  'user_alessio_aveggio',
  'peya-delivery-and-support']]

In [48]:
has_dataset = []
has_not_dataset = []

error_users = []

for user in cleaned_rows[0:]:
    # Users values
    user_email = user[0].strip()
    user_dataset = user[1].strip('_').strip().lower()
    user_project = user[2].strip('+').strip()
  
    #print(user_email)
    # User BQ client, per each poject
    user_bq_client = bigquery.Client.from_service_account_json(GCP_CREDENTIALS, project=user_project)

    dataset_id = "{project}.{dataset}".format(project=user_bq_client.project, dataset=user_dataset)

    try:
        # check if dataset exists
        dataset = user_bq_client.get_dataset(dataset_id)  # Make an API request.

        #print('{} exists'.format(dataset_id))

        has_dataset.append(user)

    except:

        #print('Creating {}'.format(dataset_id))

        has_not_dataset.append(user_email)
       
        # Construct a full Dataset object to send to the API.
        dataset = bigquery.Dataset(dataset_id)

        # Send the dataset to the API for creation, with an explicit timeout.
        dataset = user_bq_client.create_dataset(dataset, timeout=30)  # Make an API request.
        print("Created dataset {}.{}".format(user_bq_client.project, dataset.dataset_id))

    # ****************************************************************************************************
    # Update Access, data owner to user

    entry = bigquery.AccessEntry(
        role="OWNER",
        entity_type="userByEmail",
        entity_id=user_email,
    )
    # append new rol to dataset access list
    entries = list(dataset.access_entries)

    entries.append(entry)
    dataset.access_entries = entries
    
    try:
        # Update Access Entry of dataset
        dataset = user_bq_client.update_dataset(dataset, ["access_entries"])  # Make an API request.
        
       # Update dataset labels
        dataset.labels = {"owner": "user"}
        dataset = user_bq_client.update_dataset(dataset, ["labels"])  # Make an API request.

        # update description
        current_description = dataset.description
        new_description = "Dataset of user {user}, Generated by Data Quality Team".format(user=user_email)

        if current_description is None:
            dataset.description = new_description
            dataset = user_bq_client.update_dataset(dataset, ["description"])  # Make an API request.
        elif new_description not in (current_description):
            dataset.description = current_description + "\n" + new_description
            dataset = user_bq_client.update_dataset(dataset, ["description"])  # Make an API request.

        full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
        print(
            "Updated dataset '{dataset}' with modified user permissions. Added OWNER rol to {user}".format(dataset=full_dataset_id, user=user_email)
        )

    except:
        print("**** User error IAM {}".format(user_email))
        error_users.append(user_email)

Created dataset peya-argentina.user_alvaro_calero
Updated dataset 'peya-argentina.user_alvaro_calero' with modified user permissions. Added OWNER rol to alvaro.calero@pedidosya.com
Created dataset peya-delivery-and-support.user_alessio_aveggio
Updated dataset 'peya-delivery-and-support.user_alessio_aveggio' with modified user permissions. Added OWNER rol to alessio.aveggio@pedidosya.com


In [None]:
len(has_dataset)

In [None]:
len(has_not_dataset)

In [None]:
error_users