# GOAL
- Interact with TalentCards apis
- Get users data
- Get activity data

# PACKAGES

In [1]:
import pandas as pd
import requests
# from datetime import datetime

# from google.oauth2 import service_account
# from oauth2client.service_account import ServiceAccountCredentials

# PARAMETERS

In [2]:
access_token = open("../keys/talentcards.txt", mode="r").readline()

# noinspection PyArgumentList
today = pd.Timestamp.today().strftime("%Y-%m-%d")
# noinspection PyArgumentList
today_files = pd.Timestamp.today().strftime("%Y%m%d")

group=1818
user = 20129

# FUNCTIONS

## fix_columns_to_upload_to_bq

In [3]:
def fix_columns_to_upload_to_bq(df):
    fixed_columns = [column.replace("-", "_") for column in df.columns.tolist()]
    df.columns = fixed_columns
    return df

## get_users_data

In [4]:
def get_users_data(group_id=1818):
    """Get user details data from TalentCards API.

    Returns:
      Dictionary with userS details data.
    """
    base_url = "https://www.talentcards.io/api/v1"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-type": "application/json",
        "Accept": "application/json",
    }
    users=[requests.get(f"{base_url}/company/groups/{group_id}/users", headers=headers).json()]
    num_pages = users[0]["meta"]["last_page"]
    if num_pages > 1:
        for page in range(2, num_pages + 1):
            users.append(
                requests.get(
                    f"{base_url}/company/groups/{group_id}/users",
                    headers=headers,
                    params={"page[number]": page},
                ).json()
            )
    return users

## process_user_data

In [5]:
def process_user_data(raw_data, date):
    """Process raw data to get only desired data.

    Args:
      date:
      raw_data (List): List of jason api responses.

    Returns:
      Pandas dataframe pandas with structured data.
    """
    users_df = pd.DataFrame()
    for response in raw_data:
        users_list = []
        for user_record in response["data"]:
            users_dict = {"user_id": user_record["id"]}
            users_dict.update(user_record["attributes"])
            users_dict["extraction_timestamp"] = date
            users_list.append(users_dict)
        response_df = pd.DataFrame(users_list)
        users_df = users_df.append(response_df, ignore_index=True)
    users_df = users_df.sort_values(by="user_id", ignore_index=True)
    return fix_columns_to_upload_to_bq(users_df)

## get_reports_data

In [6]:
def get_reports_data(group_id=1818, user_id=20129):
    """Get user details data from TalentCards API.

    Returns:
      Dictionary with userS details data.
    """
    base_url = "https://www.talentcards.io/api/v1"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-type": "application/json",
        "Accept": "application/json",
    }
    response = [requests.get(
        f"{base_url}/company/groups/{group_id}/users/{user_id}/reports",
        headers=headers,
    ).json()]
    try:
        num_pages = response[0]["meta"]["last_page"]
        if num_pages > 1:
            for page in range(2, num_pages + 1):
                response.append(
                    requests.get(
                        f"{base_url}/company/groups/{group_id}/users/{user_id}/reports",
                        headers=headers,
                    ).json()
                )
        user_report = {"group": group_id, "user": user_id, "reports": response}
    except:
        pass
    return user_report

## process_reports_data

In [7]:
def process_reports_data(user_report_dict):
    """
    Process json user_report to get only desired data.
    """
    report = []
    for page in user_report_dict["reports"]:
        for entry in page["data"]:
            if entry["type"] == "user-set-reports":
                report_dict = {
                    "group_id": user_report_dict["group"],
                    "user_id": user_report_dict["user"],
                    "sequence_id": "",
                    "set_id": entry["id"],
                    "set-tests": entry["meta"]["report"]["set-tests"],
                    "finished-tests": entry["meta"]["report"]["finished-tests"],
                    "progress": entry["meta"]["report"]["progress"],
                    "cards": entry["meta"]["report"]["cards"],
                    "tests": entry["meta"]["report"]["tests"],
                    "started-at": entry["meta"]["report"]["started-at"],
                    "completed-at": entry["meta"]["report"]["completed-at"],
                }
                report.append(report_dict)
            elif entry["type"] == "user-sequence-reports":
                for card_set in entry["meta"]["reports"]:
                    report_dict = {
                        "group_id": user_report_dict["group"],
                        "user_id": user_report_dict["user"],
                        "sequence_id": entry["id"],
                        "set_id": card_set["id"],
                        "set-tests": card_set["meta"]["report"]["set-tests"],
                        "finished-tests": card_set["meta"]["report"]["finished-tests"],
                        "progress": card_set["meta"]["report"]["progress"],
                        "cards": card_set["meta"]["report"]["cards"],
                        "tests": card_set["meta"]["report"]["tests"],
                        "started-at": card_set["meta"]["report"]["started-at"],
                        "completed-at": card_set["meta"]["report"]["completed-at"],
                    }
                    report.append(report_dict)
        reports_df = pd.DataFrame(report)
        reports_df["started-at"] = pd.to_datetime(reports_df["started-at"].fillna(pd.NaT),utc=True).dt.tz_convert("America/Sao_paulo")
        reports_df["completed-at"] = pd.to_datetime(reports_df["completed-at"].fillna(pd.NaT),utc=True).dt.tz_convert("America/Sao_paulo")

    return fix_columns_to_upload_to_bq(reports_df)

# DATA WRANGLING

##  users data

In [8]:
df_users = process_user_data(get_users_data(), today)

In [10]:
df_users.sample(5)

Unnamed: 0,user_id,access_token,email,first_name,last_name,mobile,created_at,updated_at,last_login,enabled,extraction_timestamp
24,20162,AQDWUTXUFBZM,avile.lourenco@hotmail.com,Teresa Cristina,Avile Lourenco,5511981270139,2021-08-12T11:30:47+00:00,2021-09-14T16:10:25+00:00,2021-09-14T16:16:26+00:00,True,2021-09-19
22,20160,AQUTKQITRYMB,,Sergio,Marthins da Silva,5511966648715,2021-08-12T11:26:57+00:00,2021-08-26T12:36:34+00:00,,False,2021-09-19
39,20293,AQJPCUAXLLDQ,ricardookuno@gmail.com,Ricardo,CRKS,5511947371379,2021-08-25T13:15:49+00:00,2021-09-15T14:46:05+00:00,2021-09-15T14:51:13+00:00,True,2021-09-19
25,20163,AQOXPYUMZSVZ,,Marcelo,Adelino de Sousa,5511959960855,2021-08-12T11:31:21+00:00,2021-08-26T12:35:37+00:00,,False,2021-09-19
36,20216,AQQZIFYJJOSM,paulooshiro7@gmail.com,Paulo,Sergio Massato Oshiro,5511947505151,2021-08-13T13:01:57+00:00,2021-09-16T18:49:01+00:00,2021-09-16T18:49:01+00:00,True,2021-09-19


In [11]:
df_users.dtypes

user_id                  int64
access_token            object
email                   object
first_name              object
last_name               object
mobile                  object
created_at              object
updated_at              object
last_login              object
enabled                   bool
extraction_timestamp    object
dtype: object

In [14]:
df_users.to_excel(
    f"../data/out/{today_files}_TalentCards users extraction.xlsx", index=False
)

In [12]:
df_users.shape

(45, 11)

## user report

In [14]:
user_report = get_reports_data(group,user)

In [15]:
df_report = process_reports_data(user_report_dict=user_report)
df_report

Unnamed: 0,group_id,user_id,sequence_id,set_id,set_tests,finished_tests,progress,cards,tests,started_at,completed_at
0,1818,20129,,11474,0,0,100,13,0,2021-08-20 11:33:41-03:00,2021-08-20 11:33:47-03:00
1,1818,20129,237.0,11401,2,2,100,18,2,2021-08-20 12:17:01-03:00,2021-08-20 12:17:59-03:00
2,1818,20129,237.0,11457,1,1,100,3,1,2021-08-20 12:18:04-03:00,2021-08-20 12:18:34-03:00
3,1818,20129,,11401,2,2,100,18,2,2021-08-20 12:17:01-03:00,2021-08-20 12:17:59-03:00
4,1818,20129,,11457,1,1,100,3,1,2021-08-20 12:18:04-03:00,2021-08-20 12:18:34-03:00
5,1818,20129,,11508,1,0,87,22,1,2021-08-23 13:51:39-03:00,NaT
6,1818,20129,,11433,3,3,100,16,3,2021-08-20 14:39:42-03:00,2021-08-20 14:40:48-03:00
7,1818,20129,,12203,2,2,100,18,2,2021-09-16 14:59:44-03:00,2021-09-16 15:00:39-03:00
8,1818,20129,,12202,1,1,100,8,1,2021-09-13 18:04:06-03:00,2021-09-13 18:05:05-03:00
9,1818,20129,,11724,1,1,100,15,1,2021-08-23 12:08:42-03:00,2021-08-23 12:10:09-03:00


## all users reports

In [17]:
users_id = list(df_users["user_id"].unique())

df_reports = pd.DataFrame()
for user in users_id:
    df_reports = df_reports.append(
        process_reports_data(get_reports_data(group, user)),
        ignore_index=True,
    )


In [19]:
df_reports=df_reports.sort_values(by=["completed_at", "user_id"],ascending=False, ignore_index=True)
df_reports['started_at']=df_reports['started_at'].astype('str')
df_reports['completed_at']=df_reports['completed_at'].astype('str')
df_reports.to_excel(
    f"../data/out/{today_files}_TalentCards users reports.xlsx", index=False
)

# SCRIPTING