In [None]:
# run once
# ! pip install gspread oauth2client df2gspread boto3
from dcicutils import ff_utils
from functions.notebook_functions import *
import pandas as pd

my_key = get_key('koray_data')

all_users = ff_utils.search_metadata('/search/?type=User', key = my_key)
all_users_with_lab = [i for i in all_users if i.get('lab')]

all_labs = ff_utils.search_metadata('/search/?type=Lab', key = my_key)
skip_title = ['Peter Park, HARVARD', 'DCIC Testing Lab', '4DN Viewing Lab']
all_labs = [i for i in all_labs if i['display_title'] not in skip_title]
all_grants = ff_utils.search_metadata('/search/?type=Award', key = my_key)

In [None]:
# narrow users to 4DN users
fdn_users = []
for a_user in all_users_with_lab:
    labs = []
    awards = []
    labs = a_user.get('submits_for', [])
    labs.append(a_user['lab'])
    # skip users from test labs
    if a_user['lab']['display_title'] in skip_title:
        continue
    
    for a_lab in labs:
        awards.extend(i['uuid'] for i in a_lab.get('awards', []))
    awards = [i['viewing_group'] for i in all_grants if i['uuid'] in awards]
    if '4DN' in awards or 'NOFIC' in awards:
        fdn_users.append(a_user)


In [None]:
# GET OH SHEET
import boto3
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import json
from collections import OrderedDict
import copy

# GET KEY FROM S3 
s3 = boto3.resource('s3')
obj = s3.Object('elasticbeanstalk-fourfront-webprod-system', 'DCICjupgoogle.json')
cont = obj.get()['Body'].read().decode()
key_dict = json.loads(cont)

SCOPES = 'https://www.googleapis.com/auth/spreadsheets'
creds = ServiceAccountCredentials.from_json_keyfile_dict(key_dict, SCOPES)
gc = gspread.authorize(creds)

# The ID and range of a sample spreadsheet.
book_id = '1zPfPjm1-QT8XdYtE2CSRA83KOhHfiRWX6rRl8E1ARSw'
sheet_name = 'AllMembers'

def simple(string):
    return string.lower().strip()

book = gc.open_by_key(book_id)
worksheet = book.worksheet(sheet_name)
table = worksheet.get_all_values()
##Convert table data into an ordered dictionary
df = pd.DataFrame(table[1:], columns=table[0])
user_list = df.to_dict(orient='records', into=OrderedDict)

def update_record(a_record, user):
    a_record['DCIC UUID'] = user['uuid']
    a_record['DCIC Role'] = user.get('job_title', "")
    
    lab_name = user['lab']['display_title']
    if lab_name == '4DN DCIC, HMS':
        lab_name = 'Peter Park, HMS'
    a_record['DCIC Lab'] = lab_name
    a_record['DCIC First Name'] = user['first_name']
    a_record['DCIC Last Name'] = user['last_name']
    a_record['DCIC Account Email'] = user['email']
    user_lab = [i for i in all_labs if i['uuid'] == user['lab']['uuid']][0]
    user_awards = [i['uuid'] for i in user_lab['awards']]
    user_awards = [i for i in all_grants if i['uuid'] in user_awards]
    for an_award in user_awards:
        # find first 4dn grant
        if an_award.get('viewing_group') in ['4DN', 'NOFIC']:
            if an_award['display_title'] == 'Associate Member Award':
                award_tag = 'Associate Member'
            else:
                tag = an_award['description'].split(':')[0]
                last = an_award['pi']['last_name']
                award_tag = '{} - {} ({})'.format(tag,last, an_award['name'])
            a_record['DCIC Grant'] = award_tag
            break
    if lab_name == 'Peter Park, HMS':
        a_record['DCIC Grant'] = 'DCIC - Park (1U01CA200059-01)'
        
    return a_record

def collect_oh_info(a_record):
    """will change based on oh column names"""
    user_info = {}
    if not a_record.get('OH Account Email'):
        return
    user_info['email'] = a_record['OH Account Email']
    user_info['first_name'] = a_record['OH First Name']
    user_info['last_name'] = a_record['OH Last Name']
    user_info['job_title'] = a_record['OH Role']
    return user_info

delete_from_dcic = []
add_to_dcic = []
inform_oh = []
# iterate over records and compare
for a_record in user_list:
    # does the item have a dcic uuid
    if a_record.get('DCIC UUID'):
        # does it exist in our system
        user = [i for i in fdn_users if i['uuid'] == a_record['DCIC UUID'].strip()]
        if user:
            # is there a delete tag
            if a_record.get('OH DELETED'):
                delete_from_dcic.append(user[0]['uuid'])
                print(user[0]['email'], 'will be deleted')
            # did oh add this user
            elif not a_record.get('OH UUID'):
                inform_oh.append(user[0]['uuid'])
                #print(user[0]['email'], 'added to oh inform')
            else:
                # expected 
                pass
        # we deleted the user
        else:
            # did we already label the user with delete
            if a_record.get('DCIC DELETED'):
                pass
            else:
                # check if we have a deleted record
                try:
                    resp = ff_utils.get_metadata(a_record['DCIC UUID'], my_key)
                    if resp['status'] == 'deleted':
                        a_record['DCIC DELETED'] = 'deleted'
                except:
                    print('SKIPPING, can not find previously assigned uuid', a_record['DCIC UUID'])
    # if we did not assign a uuid
    else:
        print('item without uuid')
        # do we have an account email from dcic
        if a_record.get('DCIC Account Email'):
            #does this user exist on data
            result = ff_utils.search_metadata('/search/?type=User&email='+ a_record['DCIC Account Email'].strip().lower(), my_key)
            if result:
                assert len(result) == 1
                # update record
                temp = update_record(a_record, result[0])
                for k, v in temp.items():
                    a_record[k] = v
            # we don't have this record, gether info from oh
            else:
                user_temp = collect_oh_info(a_record)
                if user_temp:
                    add_to_dcic.append(user_temp)
        # if no dcic account email
        else:
            user_temp = collect_oh_info(a_record)
            if user_temp:
                add_to_dcic.append(user_temp)
                
# any user that is not on the list
all_dcic_uuids = [i['DCIC UUID'] for i in user_list if i.get('DCIC UUID')]
remaining_users = [i for i in fdn_users if i['uuid'] not in all_dcic_uuids]
print(len(remaining_users))

if remaining_users:
    for a_user in remaining_users:
        #create empty record object
        temp = OrderedDict()
        for i in user_list[0]:
            temp[i]= ""
        temp = update_record(temp, a_user)
        user_list.append(temp)
print(len(user_list))

In [None]:
# Adding all the data to the spreadsheet at once
gs_write = [] # A list of cell objects to write to the worksheet
# The data that is going to be written in the worksheet. In this example, each key is a column.

#Writting the data to the list
row = 1
for r,line in enumerate(user_list):
    row = r + 1
    
    # write columns
    if row == 1:
        for c, key in enumerate(line):
            col = c + 1
            gs_write.append(gspread.models.Cell(row, col, key)) 
    row = r + 2
    # write values
    for c, key in enumerate(line):
        col = c + 1
        gs_write.append(gspread.models.Cell(row, col, line[key])) 
    
# #Write the cells to the worksheet
worksheet.update_cells(gs_write)


In [None]:

all_uuids = uuids.split('\n')
for a_uuid in all_uuids:
    c = all_uuids.count(a_uuid)
    if c > 1:
        print(a_uuid)
    user = [i for i in all_users if i['uuid']== a_uuid][0]
    if user.get('contact_email'):
        if user['email'] == user['contact_email']:
            print('no contact email')
        else:
            print(user['contact_email'])
    else:
        print('no contact email')