<a href="https://colab.research.google.com/github/cemvardar/pragmatic_google_colab/blob/main/dslab_colab_utility_main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q pymongo
import json
import requests
import pandas as pd
import random
import folium
from pymongo import MongoClient
import urllib.parse
from datetime import datetime
from google.oauth2 import service_account
import mimetypes
import os
from google.cloud import storage
from oauth2client.service_account import ServiceAccountCredentials
import httplib2

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
def post_to_rest_api(payload, url):
    headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
    r = requests.post(url,
                      json.dumps(payload),
                      headers=headers)
    return r

def get_df_from_sheet(key, sheet_name):
    url = 'http://decisionsciencelab.com/api/v1.0/get_sheet_json'
    payload = {'key':key,
               'sheet_name':sheet_name}
    r = post_to_rest_api(payload, url)
    return pd.DataFrame(r.json())


def get_mongodb_url():
    userid = secrets['mongodb_user']
    password = urllib.parse.quote_plus(secrets['mongodb_password'])
    mongodb_uri = "mongodb+srv://" + userid + ":" + password + "@location-selection.vfmji.gcp.mongodb.net/location_selection?retryWrites=true&w=majority"
    return mongodb_uri


def get_document_list_from_mongodb(db_name, collection_name):
    client = MongoClient(get_mongodb_url(), retryWrites=False)
    database = client[db_name]
    list_records = [doc for doc in database[collection_name].find()]
    return list_records


def get_df_from_mongodb(db_name, collection_name):
    list_records = get_document_list_from_mongodb(db_name, collection_name)
    df = pd.DataFrame(list_records)
    return df


def get_collection(db_name, collection_name):
    client = MongoClient(get_mongodb_url(), retryWrites=False)
    database = client[db_name]
    return database[collection_name]


def insert(db_name, collection_name, json_doc):
    collection = get_collection(db_name, collection_name)
    collection.insert_one(json_doc)


def upsert(db_name, collection_name, query, doc_to_upsert):
    collection = get_collection(db_name, collection_name)
    # collection.update(query, doc_to_upsert, upsert=True, safe=True)
    collection.update_one(query, {'$set': doc_to_upsert}, upsert=True)


def now():
    return datetime.now()


def get_gcp_bucket_credentials():
    creds = {
    "type": "service_account",
    "project_id": "cem-k8-test",
    "private_key_id": "",
    "private_key": "",
    "client_email": "dslab-gcp-bucket@cem-k8-test.iam.gserviceaccount.com",
    "client_id": "101834349465593903398",
    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
    "token_uri": "https://oauth2.googleapis.com/token",
    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
    "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dslab-gcp-bucket%40cem-k8-test.iam.gserviceaccount.com"
    }
    creds["private_key_id"]=secrets['gcp_private_key_id']
    creds["private_key"]=secrets['gcp_private_key'].replace('\\n', '\n')
    gcp_bucket_credentials = service_account.Credentials.from_service_account_info(creds)
    return gcp_bucket_credentials


def upload_file_to_gcp_generic_mime_type(file_name, sub_folder_path, delete_file_from_local = False):
    bucket_name = 'decision-science-lab-bucket'
    project_id = 'cem-k8-test'
    client = storage.Client(project=project_id, credentials=get_gcp_bucket_credentials())

    mime_type, _ = mimetypes.guess_type(file_name)
    if mime_type is None:
        mime_type = 'application/octet-stream'  # Default/fallback MIME type
    upload_file_name = file_name.replace(' ', '_')
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(f"{sub_folder_path}/{upload_file_name}")
    blob.upload_from_filename(file_name, content_type=mime_type)

    # Delete local file
    if delete_file_from_local and os.path.exists(file_name):
        os.remove(file_name)
        print(f"{file_name} successfully uploaded to GCP and deleted from local")

    uploaded_file_gcp_link = f'https://storage.googleapis.com/decision-science-lab-bucket/' \
                     f'{sub_folder_path}/{upload_file_name}'
    print(uploaded_file_gcp_link)
    return uploaded_file_gcp_link

def get_secrets():
    sheet_url = 'https://docs.google.com/spreadsheets/d/1mLwdiSnTi0KoB8Zg6kMclTXXm3f_JavMX-5lAUp-Ry0/edit#gid=0'
    sheet_name = 'dev'
    df_keys = read_df_from_sheet_gspread(sheet_url, worksheet_name=sheet_name)
    key_dict = dict(zip(df_keys['key'], df_keys['value']))
    return key_dict


secrets = get_secrets()

In [4]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
import pandas as pd

def read_df_from_sheet_gspread(sheet_url, worksheet_name=None):
    creds, _ = default()
    gc = gspread.authorize(creds)

    # Extract the sheet ID from the URL
    sheet_id = sheet_url.split('/d/')[1].split('/')[0]

    sheet = gc.open_by_key(sheet_id)
    if worksheet_name:
        worksheet = sheet.worksheet(worksheet_name)
        values = worksheet.get_all_values()
        df = pd.DataFrame(values[1:], columns=values[0])  # This assumes the first row is the header
        return df

    if worksheet_name is None and len(sheet.worksheets())==1:
        worksheet = sheet.worksheets()[0]
        values = worksheet.get_all_values()
        df = pd.DataFrame(values[1:], columns=values[0])  # This assumes the first row is the header
        return df

    df_dict = {}
    for worksheet in sheet.worksheets():
    # Use the first sheet by default, or specify the name of the sheet you want to access

        # Get all values from the sheet
        values = worksheet.get_all_values()

        # Convert to a pandas DataFrame
        df = pd.DataFrame(values[1:], columns=values[0])  # This assumes the first row is the header
        df_dict[worksheet.title] = df
    return df_dict

In [1]:
def get_file_name_for_export_with_date_time(file_name_header, file_extenstion):
    formatted_datetime = now().strftime("%m_%d_%Y_%H_%M_%S")
    file_name = f"{file_name_header}_{formatted_datetime}.{file_extenstion}"
    file_name = file_name.replace(' ', '_')
    return file_name