In [1]:
import boto3
import pandas as pd
import awswrangler as wr

In [2]:
UserPoolId = "ap-southeast-1_UFv3k3Ddb" # user pool id for: prod_inspection_app_user_pool

In [3]:
def get_all_users():
    cognito = boto3.client('cognito-idp')
    
    users = []
    next_page = None
    kwargs = {
        'UserPoolId': UserPoolId
    }
    
    users_remain = True
    while(users_remain):
        if next_page:
            kwargs['PaginationToken'] = next_page
        response = cognito.list_users(**kwargs)
        users.extend(response['Users'])
        next_page = response.get('PaginationToken', None)
        users_remain = next_page is not None

    return users


In [4]:
users = get_all_users()
users


[{'Username': '214cea70-fe7e-4e1c-93e2-cb403a52ea24',
  'Attributes': [{'Name': 'sub',
    'Value': '214cea70-fe7e-4e1c-93e2-cb403a52ea24'},
   {'Name': 'email_verified', 'Value': 'true'},
   {'Name': 'given_name', 'Value': 'asaf'},
   {'Name': 'family_name', 'Value': 'inspector'},
   {'Name': 'email', 'Value': 'asaf@inspector.com'},
   {'Name': 'picture', 'Value': 'http://me.png'}],
  'UserCreateDate': datetime.datetime(2022, 8, 22, 12, 1, 46, 185000, tzinfo=tzlocal()),
  'UserLastModifiedDate': datetime.datetime(2022, 8, 22, 12, 11, 57, 250000, tzinfo=tzlocal()),
  'Enabled': True,
  'UserStatus': 'CONFIRMED'},
 {'Username': '4e7b4344-b955-48dd-a897-f36ca7860e78',
  'Attributes': [{'Name': 'sub',
    'Value': '4e7b4344-b955-48dd-a897-f36ca7860e78'},
   {'Name': 'email_verified', 'Value': 'true'},
   {'Name': 'given_name', 'Value': 'asaf'},
   {'Name': 'family_name', 'Value': 'qamanager'},
   {'Name': 'email', 'Value': 'asaf@qamanager.com'},
   {'Name': 'picture', 'Value': 'http://me.

In [5]:
len(users)

13

In [6]:
uuid =[]
given_name = []
family_name = []

for i in range(len(users)):
    
    # first extraction: uuid
    single_uuid = list(users[i]['Attributes'][0].values())[1]
    uuid.append(single_uuid)
    
    # second extraction: given name
    single_given_name = list(users[i]['Attributes'][3].values())[1]
    given_name.append(single_given_name)
    
    # third extraction family_name
    single_family_name = list(users[i]['Attributes'][4].values())[1]
    family_name.append(single_family_name)


In [7]:
df_users = pd.DataFrame()

# insert the data from cognito into the df_users dataframe
df_users['uuid'] = uuid
df_users['given_name'] = given_name
df_users['family_name'] = family_name
df_users['user_name'] = df_users['given_name'] + ' ' + df_users['family_name']

df_users

Unnamed: 0,uuid,given_name,family_name,user_name
0,214cea70-fe7e-4e1c-93e2-cb403a52ea24,inspector,asaf@inspector.com,inspector asaf@inspector.com
1,4e7b4344-b955-48dd-a897-f36ca7860e78,qamanager,asaf@qamanager.com,qamanager asaf@qamanager.com
2,7174d30b-a9c2-435a-a580-e8e22226c98a,+972504426268,Sasha,+972504426268 Sasha
3,46e3eefa-dd19-4b3d-bcf8-05b0fc236014,Alexander,Miliatin,Alexander Miliatin
4,db26b75c-7a69-47ed-8f02-09e66fe8fe10,Anton,Chasnyk,Anton Chasnyk
5,30e81ab4-ce61-4c37-9379-f8fe97ab7fd8,Chaim,Chaim,Chaim Chaim
6,d9ae58be-1e33-496e-bcb7-61c9e6a8f396,Dmitry,Efimov,Dmitry Efimov
7,e1d63815-4df0-4ee0-a705-2253ce25e910,Ilyas,Kabdyushev,Ilyas Kabdyushev
8,59b8049c-e7ae-4633-b56b-b1b5e0aefe2c,Muli,Cohen,Muli Cohen
9,aafe9f2a-7878-433e-ab30-92da7bc60a32,Tal,Yagil,Tal Yagil


In [8]:
wr.s3.to_parquet(
    df = df_users.loc[:, ('uuid', 'user_name')],
    path='s3://aiola-834657444538-data-sync-ap-southeast-1/thairelease_inspection_app_user_pool/',
    dataset=True,
    database='inspection',  # Athena/Glue database
    table='inspectors_from_cognito',  # Athena/Glue table
    dtype={'uuid': 'string',
           'user_name':'string'},
    mode = 'overwrite'
)


{'paths': ['s3://aiola-834657444538-data-sync-ap-southeast-1/thairelease_inspection_app_user_pool/e2c45c39c45640c59dcc785c351a1eab.snappy.parquet'],
 'partitions_values': {}}

In [15]:
df_users.loc[:, ('uuid', 'user_name')]

Unnamed: 0,uuid,user_name
0,2584f3e5-1e00-4a8d-b67a-fdedeab9f42d,Inspector Five
1,278310af-918f-4ca5-a558-2be1751d5389,Inspector Third
2,28b64e95-afbe-454f-ba64-2637aa9c8b4e,Inspector Second
3,4397d035-339b-40ba-a269-a0a9d26b8d1e,Inspector First
4,468724be-6389-441f-ae0a-5bfd357f6ae0,Inspector Four
5,986ef82f-76a2-44bd-bbdb-8e865be786d3,Inspector Seven
6,c0047dc2-ffe5-4f3d-abd5-37727e08e90c,Inspector Six
7,c94cf324-4c30-42bc-bd37-9e0911f4b9d4,QAManager Eight
8,efeac4c4-082b-43ab-8fb7-402758080747,ANNE MARIE Cunningham
9,f01f0496-8c52-43a6-9ef5-38670996396f,BRENDSLEY OJEDA
