In [1]:
import boto3
import pandas as pd
import awswrangler as wr

In [2]:
UserPoolId = "ap-southeast-2_NNQ7ZpKrB" # user pool id for: prod_inspection_app_user_pool

In [3]:
def get_all_users():
    cognito = boto3.client('cognito-idp')
    
    users = []
    next_page = None
    kwargs = {
        'UserPoolId': UserPoolId
    }
    
    users_remain = True
    while(users_remain):
        if next_page:
            kwargs['PaginationToken'] = next_page
        response = cognito.list_users(**kwargs)
        users.extend(response['Users'])
        next_page = response.get('PaginationToken', None)
        users_remain = next_page is not None

    return users


In [4]:
users = get_all_users()
users


[{'Username': '2584f3e5-1e00-4a8d-b67a-fdedeab9f42d',
  'Attributes': [{'Name': 'sub',
    'Value': '2584f3e5-1e00-4a8d-b67a-fdedeab9f42d'},
   {'Name': 'email_verified', 'Value': 'true'},
   {'Name': 'given_name', 'Value': 'Inspector'},
   {'Name': 'family_name', 'Value': 'Five'},
   {'Name': 'email', 'Value': 'inspector5@aiolatyson.com'},
   {'Name': 'custom:groups', 'Value': '[Inspectors]'}],
  'UserCreateDate': datetime.datetime(2022, 3, 7, 21, 49, 43, 225000, tzinfo=tzlocal()),
  'UserLastModifiedDate': datetime.datetime(2022, 3, 7, 21, 58, 44, 20000, tzinfo=tzlocal()),
  'Enabled': True,
  'UserStatus': 'CONFIRMED'},
 {'Username': '278310af-918f-4ca5-a558-2be1751d5389',
  'Attributes': [{'Name': 'sub',
    'Value': '278310af-918f-4ca5-a558-2be1751d5389'},
   {'Name': 'email_verified', 'Value': 'true'},
   {'Name': 'given_name', 'Value': 'Inspector'},
   {'Name': 'family_name', 'Value': 'Third'},
   {'Name': 'email', 'Value': 'inspector3@aiolatyson.com'},
   {'Name': 'custom:group

In [5]:
len(users)

23

In [6]:
uuid =[]
given_name = []
family_name = []

for i in range(len(users)):
    
    # first extraction: uuid
    single_uuid = list(users[i]['Attributes'][0].values())[1]
    uuid.append(single_uuid)
    
    # second extraction: given name
    single_given_name = list(users[i]['Attributes'][2].values())[1]
    given_name.append(single_given_name)
    
    # third extraction family_name
    single_family_name = list(users[i]['Attributes'][3].values())[1]
    family_name.append(single_family_name)


In [7]:
df_users = pd.DataFrame()

# insert the data from cognito into the df_users dataframe
df_users['uuid'] = uuid
df_users['given_name'] = given_name
df_users['family_name'] = family_name
df_users['user_name'] = df_users['given_name'] + ' ' + df_users['family_name']

df_users

Unnamed: 0,uuid,given_name,family_name,user_name
0,2584f3e5-1e00-4a8d-b67a-fdedeab9f42d,Inspector,Five,Inspector Five
1,278310af-918f-4ca5-a558-2be1751d5389,Inspector,Third,Inspector Third
2,28b64e95-afbe-454f-ba64-2637aa9c8b4e,Inspector,Second,Inspector Second
3,4397d035-339b-40ba-a269-a0a9d26b8d1e,Inspector,First,Inspector First
4,468724be-6389-441f-ae0a-5bfd357f6ae0,Inspector,Four,Inspector Four
5,986ef82f-76a2-44bd-bbdb-8e865be786d3,Inspector,Seven,Inspector Seven
6,c0047dc2-ffe5-4f3d-abd5-37727e08e90c,Inspector,Six,Inspector Six
7,c94cf324-4c30-42bc-bd37-9e0911f4b9d4,QAManager,Eight,QAManager Eight
8,efeac4c4-082b-43ab-8fb7-402758080747,ANNE MARIE,Cunningham,ANNE MARIE Cunningham
9,f01f0496-8c52-43a6-9ef5-38670996396f,BRENDSLEY,OJEDA,BRENDSLEY OJEDA


In [14]:
wr.s3.to_parquet(
    df = df_users.loc[:, ('uuid', 'user_name')],
    path='s3://aiola-777622251240-ap-southeast-2-data/users_data/',
    dataset=True,
    database='inspection-app',  # Athena/Glue database
    table='inspectors_from_cognito',  # Athena/Glue table
    dtype={'uuid': 'string',
           'user_name':'string'},
    mode = 'overwrite'
)


{'paths': ['s3://aiola-777622251240-ap-southeast-2-data/users_data/d334106509584b70883091891db61690.snappy.parquet'],
 'partitions_values': {}}

In [15]:
df_users.loc[:, ('uuid', 'user_name')]

Unnamed: 0,uuid,user_name
0,2584f3e5-1e00-4a8d-b67a-fdedeab9f42d,Inspector Five
1,278310af-918f-4ca5-a558-2be1751d5389,Inspector Third
2,28b64e95-afbe-454f-ba64-2637aa9c8b4e,Inspector Second
3,4397d035-339b-40ba-a269-a0a9d26b8d1e,Inspector First
4,468724be-6389-441f-ae0a-5bfd357f6ae0,Inspector Four
5,986ef82f-76a2-44bd-bbdb-8e865be786d3,Inspector Seven
6,c0047dc2-ffe5-4f3d-abd5-37727e08e90c,Inspector Six
7,c94cf324-4c30-42bc-bd37-9e0911f4b9d4,QAManager Eight
8,efeac4c4-082b-43ab-8fb7-402758080747,ANNE MARIE Cunningham
9,f01f0496-8c52-43a6-9ef5-38670996396f,BRENDSLEY OJEDA
