# Load libraries

In [1]:
### Load libraries 
import os
import numpy as np
import pandas as pd

from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype

from ipywidgets import *
from IPython.display import clear_output, display
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

import qgrid
from traitlets import All

import helpers.research_helpers as app

CSV_SEPARATOR = ','     # '\t'

print('Notebook initialized.')

Notebook initialized.


# Load data

In [2]:
### Load membership list from '../sampling/All Active Roster.csv' 

df_complete_list = app.load_csv(app.FLDR_SAMPLING + '/All Active Roster.csv')
print(str(len(df_complete_list)) + ' records loaded from ' + app.FLDR_SAMPLING + '/All Active Roster.csv')

302352 records loaded from ../sampling/All Active Roster.csv


# Calculate... manager/worker

In [3]:
### Calculate Title based off of JOB_TITLE, BOSS_TITLE, and NUM_OVERSEEN and store in TITLE_CALCULATED 

df_complete_list = app.assign_title(df_complete_list)

#print('The grid below is meant as informational purposes only.')
#df_test = df_filtered_list.filter([app.COL_JOB_TITLE, app.COL_BOSS_TITLE, app.COL_NUM_OVERSEEN, app.COL_TITLE_CALCULATED])
#qgrid_widget = qgrid.show_grid(df_test, show_toolbar=False)
#qgrid_widget

Assigned Manager/Worker Title.


# Calculate... pub/private sector

In [4]:
### Calculate Pub/Private based off of ORG_TYPE, INDUSTRY, and EMAIL_DOMAIN and store in PUB_PRIVATE_CALCULATED 

df_complete_list = app.assign_pub_private_sector(df_complete_list)

#print('The grid below is meant as informational purposes only.')
#df_test = df_filtered_list.filter([app.COL_ORG_TYPE, app.COL_INDUSTRY, app.COL_EMAIL_DOMAIN, app.COL_PUB_PRIVATE_CALCULATED])
#qgrid_widget = qgrid.show_grid(df_test, show_toolbar=False)
#qgrid_widget

Assigned Pub/Private Sector.


# Calculate... expiration date range

In [5]:
### Calculate Expiration Range based off of EXPIRY_DATE and store in EXPIRY_RANGE_CALCULATED 

df_complete_list = app.assign_expiration_range(df_complete_list)

#print('The grid below is meant as informational purposes only.')
#df_test = df_filtered_list.filter([app.COL_ORG_TYPE, app.COL_INDUSTRY, app.COL_EMAIL_DOMAIN, app.COL_PUB_PRIVATE_CALCULATED])
#qgrid_widget = qgrid.show_grid(df_test, show_toolbar=False)
#qgrid_widget

Assigned expiration range.


# Filter by... membership_items

In [6]:
### Please select those membership_items you would like to keep. 
# The default list (keep_records.KEEP_LIST_MEMBERSHIP_ITEMS) is already selected.

qgrid_membership_item = app.keep_grid_show_filter(df_complete_list, app.COL_MEMBERSHIP_ITEM, app.COL_MEMBER_ID, app.KEEP_LIST_MEMBERSHIP_ITEMS)

VBox(children=(HBox(children=(HTML(value='<h3>Your possible record count is <b><u>275490</u></b> out of <u>302…

## Apply Changes

In [None]:
### Apply filtering 
#
#To just get the dataframe returned by qgrid_sheet_to_keep, just run the next 2 lines s
#df_filter_by = qgrid_sheet_to_keep.get_changed_df()
#df_filter_by.head(40)

df_filtered_membership_item, app.KEEP_LIST_MEMBERSHIP_ITEMS = app.keep_grid_apply_filter(qgrid_membership_item, app.COL_MEMBERSHIP_ITEM, df_complete_list, True)

# Filter by... membership expiration date range

In [None]:
### Please select those expiration groups you would like to keep. 
## The default list (keep_records.KEEP_LIST_EXPIRES_IN) is already selected.

# Allow user's to select which expiration groups, if any, they want to exclude
qgrid_exp_date_range = app.keep_grid_show_filter(df_filtered_membership_item, app.COL_EXPIRY_RANGE_CALCULATED, app.COL_MEMBER_ID, app.KEEP_LIST_EXPIRES_IN)
#qgrid_sheet_to_keep

## Apply changes

In [None]:
### Apply filtering 
#
# To just get the dataframe returned by qgrid_sheet_to_keep, just run the next 2 lines s
#df_filter_by = qgrid_sheet_to_keep.get_changed_df()
#df_filter_by.head(40)

df_filtered_exp_date_range, app.KEEP_LIST_EXPIRES_IN = app.keep_grid_apply_filter(qgrid_exp_date_range, app.COL_EXPIRY_RANGE_CALCULATED, df_filtered_membership_item, True)

# Filter by... job_title

In [None]:
### Please select those job titles you would like to keep. 

qgrid_job_title = app.keep_grid_show_filter(df_filtered_exp_date_range, app.COL_JOB_TITLE, app.COL_MEMBER_ID, app.KEEP_LIST_JOB_TITLES)

## Apply changes

In [None]:
### Apply filtering 

# To just get the dataframe returned by qgrid_sheet_to_keep, just run the next 2 lines s
#df_filter_by = qgrid_sheet_to_keep.get_changed_df()
#df_filter_by.head(40)

df_filtered_job_title, app.KEEP_LIST_JOB_TITLES = app.keep_grid_apply_filter(qgrid_job_title, app.COL_JOB_TITLE, df_filtered_exp_date_range, True)

# Filter by... region 

## Apply changes 

# Filter by... survey recency

## Apply changes

# Save output

## Get project/directory name

In [15]:
### Add code to save project and data files to a directory of the user's choosing 

project_name_lbl = widgets.Label('Project Name: ')
project_name_txt = widgets.Text()
display(project_name_lbl)
display(project_name_txt)

Label(value='Project Name: ')

Text(value='')

In [None]:
### Your data will be saved to... 
project_name_results_lbl = widgets.Label()
project_name_results_lbl.value = 'Your data will be saved to ' + app.FLDR_SAMPLING + '/' + project_name_txt.value + '/'
display(project_name_results_lbl)

In [None]:
### Create folder in ../s_drive_sampling/<project_name> 

new_abs_path = os.path.join(os.getcwd(), app.FLDR_SAMPLING, project_name_txt.value)

if os.path.exists(new_abs_path):
    print('The project name already exists.')
    print('Either create a new project name by rerunning the "Get project/directory name" cells, or...')
    print('Use the project name as is.  \n\nWarning - this may cause issues later!')
else:
    print('Created project folder in: \n{}'.format(os.path.join(os.getcwd(), app.FLDR_SAMPLING, project_name_txt.value)))
    os.mkdir(new_abs_path)

## Save project files