## Imports

In [1]:
import sys
import pandas as pd
from datetime import date

# Tableau API connection
import tableauserverclient as TSC

# SQL server connection
from sqlalchemy import create_engine

from helpers import helpers_tableauserver as ts
from helpers import helpers_general as hg

*Some additional settings*

In [None]:
# to show warning only once
import warnings; warnings.filterwarnings(action='once')

# to avoid warnings (if necessary)
if not sys.warnoptions:
    warnings.simplefilter('ignore')
    
# to print all the outputs in the cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Global parameters

***NB! Change Global parameters below with your credentials!***

### Tableau Development environment parameters

In [None]:
server_url_dev = 'https://your_dev__server.tableau.example.com'    
token_name_dev = 'token_tableau_all_sites_dev'
token_value_dev = 'paste_token_value_dev'
site_name_dev = 'Default'  # any Site could be selected to start

### Tableau Production environment parameters

In [None]:
server_url_prod = 'https://your_prod__server.tableau.example.com'    
token_name_prod = 'token_tableau_all_sites_prod'
token_value_prod = 'paste_token_value_prod'
site_name_prod = 'Default'  # any Site could be selected to start

### Parameters to store data from Tableau servers

*The data will be stored in a directory called `tableau_server_data` in the current working directory in Excel format*

In [None]:
file_path_excel_all_users = './tableau_server_data/tableau_server_users_groups.xlsx'
sheet_name_all_users = 'all_tableau_users_and_groups'

file_path_excel_users_compared = './tableau_server_data/comparison_licensed_users_on_servers.xlsx'
sheet_name_compared_users = 'comparison_users_on_servers'

*Also data will be daily collected in the CSV file and stored in the same directory.*

In [None]:
file_path_local = './tableau_server_data/all_tableau_users_group.csv'

*Additionally, the data will be stored in a Microsoft SQL Server database.*

In [None]:
server_sql = 'server_sql'
database = 'TEMP' 
schema_ref = 'dbo'
username = 'database_username'  
password = 'database_password'
table_ref = 'all_tableau_users_group'

## Get users and their groups from Tableau Development server

### Open development server connection

In [None]:
server = ts.connect_to_server_site(server_url_dev, token_name_dev, token_value_dev, site_name_dev)

### Get users from all the server sites

In [None]:
df_users_dev = ts.get_all_server_users(server)

*Do some explorarory of our Tableau server users*

In [None]:
df_users_dev.info()
df_users_dev.head()
df_users_dev.sample(10)

### Get tableau server groups

In [None]:
df_groups_dev = ts.get_all_server_groups(server)

*Do some explorarory of our Tableau server groups and users belonging to those groups*

In [None]:
df_groups_dev.info()
df_groups_dev.head()
df_groups_dev.sample(10)

### All users & thier groups on Tableau DEVELOPMENT server

In [None]:
col_to_use = df_users_dev.columns.difference(df_groups_dev)

df_all_users_groups_dev = (pd.merge(
    df_groups_dev, df_users_dev[col_to_use], how='left', on=['user_name_ad', 'user_tableau_id', 'site_name'])
)

*Additional useful information should be added to the DataFrame*

In [None]:
df_all_users_groups_dev['tableau_server_type'] = 'dev'
df_all_users_groups_dev['tableau_server_url'] = server_url_dev

### Close Tableau DEVELOPMENT server connection

In [None]:
ts.sign_out_from_server(server)

## Get users and their groups from Tableau Production server

In [None]:
server = ts.connect_to_server_site(server_url_prod, token_name_prod, token_value_prod, site_name_prod)

### Get users from all the production server sites

In [None]:
df_users_prod = ts.get_all_server_users(server)

### Get server groups

In [None]:
df_groups_prod = ts.get_all_server_groups(server)

### All users & thier groups on Tableau PRODUCTION server

In [None]:
col_to_use = df_users_prod.columns.difference(df_groups_prod)
df_all_users_groups_prod = (pd.merge(df_groups_prod, df_users_prod[col_to_use], how='left', on=['user_name_ad', 'user_tableau_id', 'site_name']))

*Additional useful information should be added to the DataFrame of useers and their groups on the production Tableau server*

In [None]:
df_all_users_groups_prod['tableau_server_type'] = 'prod'
df_all_users_groups_prod['tableau_server_url'] = server_url_prod

### Close Tableau PRODUCTION server connection

In [None]:
ts.sign_out_from_server(server)

### Appending DataFrames

In [None]:
df_all_users_groups = df_all_users_groups_dev.append(df_all_users_groups_prod, ignore_index=True)
df_all_users_groups.sample(15)

### Do quick EDA and Data preprocessing

In [None]:
df_all_users_groups.shape
df_all_users_groups.info()
df_all_users_groups.isna().mean()
df_all_users_groups.duplicated().sum()

*Filling some NaNs*

In [None]:
df_all_users_groups['group_min_site_role'].fillna('Undefined',inplace=True)

# Fill in missing values with some strange dates
df_all_users_groups['last_login'].fillna('1900-01-01', inplace=True)

# validate the results
df_all_users_groups.isna().mean()

*Changing data types*

In [None]:
df_all_users_groups['last_login'] = df_all_users_groups['last_login'].astype('datetime64[ns]')

*Adding current date*

In [None]:
df_all_users_groups['update_date'] = date.today()

### Compare users on 2 servers

*In my case, there are users who are represented only on one of the servers and are absent on the other.*

*To manage licensing, we need to know who these users are. For this purpose a new DataFrame will be created.*

In [None]:
df_licensed_users_dev = df_all_users_groups_dev[df_all_users_groups['site_role'] != 'Unlicensed'][[
       'user_name_ad', 'full_name', 'tableau_server_type']]

In [None]:
df_licensed_users_prod = df_all_users_groups_prod[df_all_users_groups_prod['site_role'] != 'Unlicensed'][[
       'user_name_ad', 'full_name', 'tableau_server_type']]

In [None]:
df_compared_users_on_servers = df_licensed_users_dev.merge(df_licensed_users_prod, on=['user_name_ad', 'full_name'], how='outer').drop_duplicates().reset_index()

In [None]:
df_compared_users_on_servers.rename(columns={'tableau_server_type_x': 'is_exists_on_dev', 'tableau_server_type_y': 'is_exists_on_prod'}, inplace=True)

In [None]:
df_compared_users_on_servers['is_exists_on_dev'].fillna('No', inplace=True)
df_compared_users_on_servers['is_exists_on_prod'].fillna('No', inplace=True)

In [None]:
df_compared_users_on_servers['update_date'] = date.today()

## Store data to Excel, CSV file, and to SQL database

*Write to Excel file all Tableau server users and their groups*

In [None]:
hg.write_to_excel(df_all_users_groups, file_path_excel_all_users, sheet_name_all_users)

*Write to an Excel DataFrame file comparing existing users on 2 servers*

In [None]:

hg.write_to_excel(df_compared_users_on_servers, file_path_excel_users_compared, sheet_name_compared_users)

*Append data to CSV file*

In [None]:
hg.append_df_to_csv(df_all_users_groups, file_path_local)

In [None]:
# check that's rows are appended
pd.read_csv('./tableau_server_data/all_tableau_users_group.csv').shape

*Write DataFrame to Microsoft SQL Server table*

*- DB connection*

In [None]:
conn_str = f'mssql+pyodbc://{username}:{password}@{server_sql}/{database}?driver=ODBC+Driver+17+for+SQL+Server'
engine = create_engine(conn_str)

In [None]:
hg.write_df_to_sql(engine, df_all_users_groups, table_ref)

*- Confirm that data in the database*

In [None]:
df_all_users_groups.shape

In [None]:
%%time
query = f'SELECT COUNT (1) FROM {schema_ref}.{table_ref}'
_tmp = pd.read_sql(query, engine)
_tmp

In [None]:
query = f'SELECT TOP (5) * FROM {schema_ref}.{table_ref}'
_tmp = pd.read_sql(query, engine)
_tmp