# GOAL
- Interact with TalentCards apis
- Get users data
- Get activity data

# PACKAGES

In [1]:
import json
import os
from datetime import datetime
from typing import Dict, List

import pandas as pd
import requests

# from google.oauth2 import service_account
# from oauth2client.service_account import ServiceAccountCredentials

# PARAMETERS

In [120]:
access_token = open("./keys/talentcards.txt", mode="r").readline()

# FUNCTIONS

## fix_columns_to_upload_to_bq

In [121]:
def fix_columns_to_upload_to_bq(df: pd.DataFrame):
    fixed_columns = [column.replace("-", "_") for column in df.columns.tolist()]
    df.columns = fixed_columns
    return df

## get_users_data

In [122]:
def get_users_data() -> List:
    """Get user details data from Talentlms API.

    Returns:
      Dictionary with userS details data.
    """
    users = []
    base_url = "https://www.talentcards.io/api/v1"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-type": "application/json",
        "Accept": "application/json",
    }
    users.append(
        requests.get(f"{base_url}/company/groups/1818/users", headers=headers).json()
    )

    num_pages = users[0]["meta"]["last_page"]
    if num_pages > 1:
        for page in range(2, num_pages + 1):
            users.append(
                requests.get(
                    f"{base_url}/company/groups/1818/users",
                    headers=headers,
                    params={"page[number]": page},
                ).json()
            )
    return users

## process_user_data

In [5]:
def process_user_data(raw_data: List, date: datetime) -> pd.DataFrame:
    """Process raw data to get only desired data.

    Args:
      date:
      raw_data (List): List of jason api responses.

    Returns:
      Pandas dataframe pandas with structured data.
    """
    users_df = pd.DataFrame()
    for response in raw_data:
        users_list = []
        for user in response["data"]:
            users_dict = {"user_id": user["id"]}
            users_dict.update(user["attributes"])
            update_at = user["attributes"]["updated-at"][:-6]
            update_at_date = datetime.strptime(update_at, "%Y-%m-%dT%H:%M:%S")
            users_dict["updated_at"] = update_at_date
            users_dict["days_since_last_login"] = (datetime.now() - update_at_date).days
            users_dict["date_str"] = date
            del users_dict["updated-at"]
            users_list.append(users_dict)
        response_df = pd.DataFrame(users_list).sort_values(
            by="user_id", ignore_index=True
        )
        users_df = users_df.append(response_df, ignore_index=True)
    users_df = users_df.sort_values(by="user_id", ignore_index=True)
    users_df.insert(0, "user_id_humane", range(1, len(users_df) + 1))
    return users_df

## get_reports_data

In [None]:
def get_reports_data(group, user) -> List:
    """Get user details data from Talentlms API.

    Returns:
      Dictionary with userS details data.
    """
    base_url = "https://www.talentcards.io/api/v1"
    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-type": "application/json",
        "Accept": "application/json",
    }
    response = []
    response.append(
        requests.get(
            f"{base_url}/company/groups/{group}/users/{user}/reports",
            headers=headers,
        ).json()
    )

    num_pages = response[0]["meta"]["last_page"]
    if num_pages > 1:
        for page in range(2, num_pages + 1):
            response.append(
                requests.get(
                    f"{base_url}/company/groups/{group}/users/{user}/reports",
                    headers=headers,
                ).json()
            )

    user_report = {"group": group, "user": user, "reports": response}

    return user_report

## process_reports_data

In [110]:
def process_reports_data(user_report_dict):
    """Process json user_report to get only desired data.

    Args:
      date:
      reports_data (Dict): Semi-structured json data.

    Returns:
      Pandas dataframe pandas with structured data.
    """

    report = []
    for page in user_report_dict["reports"]:
        for entry in page["data"]:
            if entry["type"] == "user-set-reports":
                report_dict = {
                    "group_id": user_report_dict["group"],
                    "user_id": user_report_dict["user"],
                    "sequence_id": "",
                    "set_id": entry["id"],
                    "set-tests": entry["meta"]["report"]["set-tests"],
                    "finished-tests": entry["meta"]["report"]["finished-tests"],
                    "progress": entry["meta"]["report"]["progress"],
                    "cards": entry["meta"]["report"]["cards"],
                    "tests": entry["meta"]["report"]["tests"],
                    "started-at": entry["meta"]["report"]["started-at"],
                    "completed-at": entry["meta"]["report"]["completed-at"],
                }
                report.append(report_dict)
            elif entry["type"] == "user-sequence-reports":
                for card_set in entry["meta"]["reports"]:
                    report_dict = {
                        "group_id": user_report_dict["group"],
                        "user_id": user_report_dict["user"],
                        "sequence_id": entry["id"],
                        "set_id": card_set["id"],
                        "set-tests": card_set["meta"]["report"]["set-tests"],
                        "finished-tests": card_set["meta"]["report"]["finished-tests"],
                        "progress": card_set["meta"]["report"]["progress"],
                        "cards": card_set["meta"]["report"]["cards"],
                        "tests": card_set["meta"]["report"]["tests"],
                        "started-at": card_set["meta"]["report"]["started-at"],
                        "completed-at": card_set["meta"]["report"]["completed-at"],
                    }
                    report.append(report_dict)
        reports_df = pd.DataFrame(report)
        reports_df["started-at"] = pd.to_datetime(reports_df["started-at"])
        reports_df["completed-at"] = pd.to_datetime(reports_df["completed-at"])

    return fix_columns_to_upload_to_bq(reports_df)

# DATA WRANGLING

##  users data

In [123]:
date_str = datetime.today().strftime("%Y-%m-%d")
df_users = process_user_data(get_users_data(), date_str)

In [124]:
df_users.to_excel("./data/out/users.xlsx")

In [125]:
df_users

Unnamed: 0,user_id_humane,user_id,access-token,email,first-name,last-name,mobile,created-at,last-login,updated_at,days_since_last_login,date_str
0,1,20029,AQZSDRJJNPOF,jmbenedetto@byhumane.com,JM,Benedetto,+5511957828747,2021-07-25T00:11:04+00:00,,2021-08-15 12:56:03,-1,2021-08-15
1,2,20030,AQBJIIYALBBE,ajacomy@byhumane.com,Aurelien,Jacomy,,2021-07-25T02:42:03+00:00,,2021-08-14 17:58:14,0,2021-08-15
2,3,20032,AQAISHKLLSMM,mciriani@byhumane.com,Martin,Ciriani,+5511999813322,2021-07-25T21:00:47+00:00,,2021-08-13 20:59:10,1,2021-08-15
3,4,20082,AQQGGRDGFKON,humane1@vintomaper.com,H,Aluno,,2021-07-31T23:54:33+00:00,,2021-08-08 18:44:09,6,2021-08-15
4,5,20100,AQGJEGFMZPFF,lamarante@byhumane.com,Luana,Amarante,+5585997228378,2021-08-03T11:05:51+00:00,,2021-08-14 15:59:01,0,2021-08-15
5,6,20108,AQQREHLKASWG,viniciusgalvaoia@gmail.com,Vinicius,Galvao,,2021-08-04T22:15:47+00:00,,2021-08-08 13:58:50,6,2021-08-15
6,7,20118,AQCAGKPUKWUI,contato@hyssopmedia.com,Martin,Ciriani,,2021-08-06T12:29:27+00:00,,2021-08-11 22:25:24,3,2021-08-15
7,8,20129,AQHQBUDACZMX,jacomya@gmail.com,Aurelien,Jacomy,,2021-08-08T20:10:50+00:00,,2021-08-12 21:04:54,2,2021-08-15
8,9,20130,AQBSAGDGUFDJ,frsantanna@gmail.com,Teste,Luana,+5551996868731,2021-08-08T22:11:25+00:00,,2021-08-13 11:56:04,1,2021-08-15
9,10,20134,AQNKMXSVXSHI,,Fernando,Cesar,+5511947791196,2021-08-09T16:39:58+00:00,,2021-08-13 13:20:30,1,2021-08-15


## user report

In [None]:
users_id = list(df_users["user_id"].unique())

In [111]:
group = 1818
user = 20029
user_report = get_reports_data(group=group, user=user)

In [112]:
df_actions = process_reports_data(user_report_dict=user_report)
df_actions

Unnamed: 0,group_id,user_id,sequence_id,set_id,set_tests,finished_tests,progress,cards,tests,started_at,completed_at
0,1818,20029,,11474,0,0,100,13,0,2021-08-15 10:14:46+00:00,2021-08-15 10:14:52+00:00
1,1818,20029,237.0,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
2,1818,20029,237.0,11457,1,0,0,3,1,NaT,NaT
3,1818,20029,,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
4,1818,20029,,11457,1,0,0,3,1,NaT,NaT
5,1818,20029,,11433,3,3,100,16,3,2021-08-13 21:49:07+00:00,2021-08-15 10:17:20+00:00


In [113]:
df_actions[df_actions["completed_at"] > datetime.today().strftime("%Y-%m-%d")]

Unnamed: 0,group_id,user_id,sequence_id,set_id,set_tests,finished_tests,progress,cards,tests,started_at,completed_at
0,1818,20029,,11474,0,0,100,13,0,2021-08-15 10:14:46+00:00,2021-08-15 10:14:52+00:00
1,1818,20029,237.0,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
3,1818,20029,,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
5,1818,20029,,11433,3,3,100,16,3,2021-08-13 21:49:07+00:00,2021-08-15 10:17:20+00:00


In [114]:
df_actions.sort_values(by="completed_at", ascending=False)

Unnamed: 0,group_id,user_id,sequence_id,set_id,set_tests,finished_tests,progress,cards,tests,started_at,completed_at
1,1818,20029,237.0,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
3,1818,20029,,11401,2,2,100,18,2,2021-08-15 10:18:23+00:00,2021-08-15 12:30:36+00:00
5,1818,20029,,11433,3,3,100,16,3,2021-08-13 21:49:07+00:00,2021-08-15 10:17:20+00:00
0,1818,20029,,11474,0,0,100,13,0,2021-08-15 10:14:46+00:00,2021-08-15 10:14:52+00:00
2,1818,20029,237.0,11457,1,0,0,3,1,NaT,NaT
4,1818,20029,,11457,1,0,0,3,1,NaT,NaT


# SCRIPTING