# GOAL
- Write here the notebook onjectives.

# PACKAGES

In [151]:
import pandas as pd
from google.oauth2 import service_account
import pandas_gbq
import logging
import gcsfs

# PARAMETERS

In [152]:
project_id = "analytics-dev-308300"
cred_file="../keys/gcp_key.json"

users_filename='gs://humane-landing-zone/manual/talentcards_user_list.xlsx'

users_table='talentcards.users_manual'

In [153]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

# FUNCTIONS

## Get excel file from GCS with users_data

In [154]:
def get_users_xlsx(project_id, users_filename, cred_file):
    """"
    (str,str,str)-->df
    """
    
    fs=gcsfs.GCSFileSystem(project=project_id,access='read_write',token=cred_file)
    with fs.open(users_filename) as users_file:
        users_df = pd.read_excel(users_file)
    users_df['Identifier']=users_df['Identifier'].replace('-','',regex=True)
    return users_df

## calculate timedelta_since_lst_login

In [155]:
def transform2timedelta(string_like_timedelta):    
    """
    (str)-->pd.Timedelta
    
    Exemples:
    >>transform2timedelta('2 days ago')
    Timedelta('2 days 00:00:00')

    >>transform2timedelta('2 weeks ago')
    Timedelta('14 days 00:00:00')

    >>transform2timedelta('1 week ago')
    Timedelta('7 days 00:00:00')

    >>transform2timedelta('1 month ago')
    Timedelta('30 days 00:00:00')
    
    """
    if string_like_timedelta == 'Never':
        new_time_delta=pd.NaT
    elif 'ago' in string_like_timedelta:
        new_time_delta=string_like_timedelta.replace(' ago','')
        if 'week' in new_time_delta:
            new_time_delta=new_time_delta.replace('weeks','W').replace('week','W')
        elif 'month' in new_time_delta:
            new_time_delta=(new_time_delta
                            .replace('months','D')
                            .replace('month','D')
                            .replace(new_time_delta.split()[0],str(int(new_time_delta.split()[0])*30))
                           )
    return pd.Timedelta(new_time_delta).days


# assert type(transform2timedelta('Never'))==pd._libs.tslibs.nattype.NaTType

# assert transform2timedelta('2 days ago')==pd.Timedelta('2 days 00:00:00')

# assert transform2timedelta('2 weeks ago')==pd.Timedelta('14 days 00:00:00')

# assert transform2timedelta('1 week ago')==pd.Timedelta('7 days 00:00:00')

# assert transform2timedelta('1 month ago')==pd.Timedelta('30 days 00:00:00')

## prepare_users_df

In [162]:
def prepare_users_df(users_df,group_id=1818):
    """
    (df)-->df
    """
    users_df_prep=users_df.copy()
    users_df_prep['Last used']=users_df_prep['Last used'].apply(lambda x: transform2timedelta(x))
    users_df_prep['Joined']=users_df_prep['Joined'].map({'No':False,'Yes':True})
    users_df_prep=users_df_prep.rename(columns={
        'Last used':'days_since_last_login',
        'Joined':'joined_group',
        'Status':'group_activation',
        'Name':'user_name'
        })
    users_df_prep['group_id']=group_id
    users_df_prep['extraction_timestamp']=pd.Timestamp.today(tz='utc').strftime('%Y-%m-%d %H:%M:%S')
    users_df_prep.columns=[column.lower() for column in users_df_prep.columns]
    return users_df_prep

## create gbq table with users_df

In [164]:
def create_users_table(project_id=project_id, users_filename=users_filename, cred_file=cred_file, table_name=users_table):
    """
    (str,str,str,str,str)-->gbq table
    """
    credentials = service_account.Credentials.from_service_account_file(cred_file)
    
    prepare_users_df(get_users_xlsx(project_id=project_id, users_filename=users_filename, cred_file=cred_file)).to_gbq(
                            table_name,project_id=project_id,credentials=credentials,if_exists='replace')

# DATA WRANGLING

In [165]:
create_users_table()

45 out of 45 rows loaded.
45 out of 45 rows loaded.
45 out of 45 rows loaded.
1it [00:05,  5.83s/it]


# SCRIPTING