<a href="https://colab.research.google.com/github/aai510-group1/project/blob/main/project_jon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AAI-510 Group 1 - Project Implementation

In [12]:
!apt-get install -qqq --no-install-recommends graphviz libgraphviz-dev

# USE uv FOR FASTER PIP INSTALLS
%pip install --quiet --progress-bar=off uv

# INSTALL DEPENDENCIES
!uv pip uninstall --system --quiet bokeh mkl
!uv pip install --system --quiet --upgrade pip
!uv pip install --system --quiet --upgrade black[jupyter] bokeh datasets isort pygraphviz setuptools wheel
!uv pip install --system --quiet autogluon numpy==1.24.4 shap

[0m

In [5]:
# @title Create a Folder in Google Drive and Enable Top-Level Access in the File Browser { display-mode: "form" }
def setup_drive_folder(google_drive_folder):
    import contextlib, google.colab, os, pathlib
    if not google_drive_folder:
        google_drive_folder = "temp"
    with contextlib.redirect_stdout(open(os.devnull, 'w')):
        google.colab.drive.mount("/content/drive", force_remount=True)
    drive_path = pathlib.Path("/content/drive/MyDrive")
    colab_notebooks_path = drive_path / "Colab Notebooks"
    project_path = colab_notebooks_path / google_drive_folder
    project_path.mkdir(parents=True, exist_ok=True)
    shortcut = pathlib.Path(f"/content/{google_drive_folder}")
    shortcut.parent.mkdir(parents=True, exist_ok=True)
    if not shortcut.exists():
        shortcut.symlink_to(project_path)
    print(f"SHORTCUT: {shortcut} --> {project_path}")
    return str(shortcut)

google_drive_folder = "aai510-group1"  # @param { type: "string" }
SHORTCUT = setup_drive_folder(google_drive_folder)

SHORTCUT: /content/aai510-group1 --> /content/drive/MyDrive/Colab Notebooks/aai510-group1


In [13]:
# LOAD MERGED TELCO CUSTOMER DATASET
from datasets import load_dataset
dataset = load_dataset('aai510-group1/telco-customer-churn', token=False)

# CONVERT DATASETS TO PANDAS DATAFRAMES
df_train = dataset['train'].to_pandas()
df_test = dataset['test'].to_pandas()
df_val = dataset['validation'].to_pandas()

In [10]:
import os
from autogluon.tabular import TabularPredictor

TARGET_LABEL = "Churn"
EVAL_METRIC = "roc_auc"
DPI = 300
PRESETS = [
    "best_quality",
    "optimize_for_deployment",
]
LEAKED_FEATURES = [
    "Churn Category",
    "Churn Reason",
    "Churn Score",
    "Customer Status",
]

predictor = TabularPredictor(
    eval_metric=EVAL_METRIC,
    label=TARGET_LABEL,
    learner_kwargs={"ignored_columns": LEAKED_FEATURES},
    verbosity=3,
).fit(
    df_train,
    dynamic_stacking=False,
    presets=PRESETS,
    time_limit=3600,
    tuning_data=df_val,
    use_bag_holdout=True,
)

No path specified. Models will be saved in: "AutogluonModels/ag-20240601_164949"
Presets specified: ['best_quality', 'optimize_for_deployment']
User Specified kwargs:
{'auto_stack': True,
 'keep_only_best': True,
 'num_bag_sets': 1,
 'save_space': True,
 'use_bag_holdout': True}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': None,
 'auto_stack': True,
 'calibrate': 'auto',
 'ds_args': {'clean_up_fits': True,
             'detection_time_frac': 0.25,
             'holdout_data': None,
             'holdout_frac': 0.1111111111111111,
             'memory_safe_fits': True,
             'n_folds': 2,
             'n_repeats': 1,
             'validation_procedure': 'holdout'},
 'excluded_model_types': None,
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': None,
 'included_model_types': None,
 'keep_only_best': True,
 'name_suffix': None,

In [8]:
import os
import matplotlib.pyplot as plt

# SAVE TRAINING SUMMARY
results = predictor.fit_summary(verbosity=3)
with open(os.path.join(predictor.path, "fit_summary.txt"), "w") as f:
    f.write(str(results))

# SAVE ENSEMBLE ARCHITECTURE
predictor.plot_ensemble_model()

Loading: AutogluonModels/ag-20240601_120330/models/KNeighborsUnif_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/KNeighborsDist_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/LightGBMXT_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/LightGBM_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/RandomForestGini_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/RandomForestEntr_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/CatBoost_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/ExtraTreesGini_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/ExtraTreesEntr_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/NeuralNetFastAI_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/XGBoost_BAG_L1/model.pkl
Loading: AutogluonModels/ag-20240601_120330/models/NeuralNetTorch_BAG_L1/model.pkl
Loading: AutogluonMode

*** Summary of fit() ***
Estimated performance of each model:
                           model  score_val eval_metric  pred_time_val     fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0            WeightedEnsemble_L3   0.994849     roc_auc      21.703783  1039.909187                0.000708           0.255961            3       True        117
1           CatBoost_r163_BAG_L2   0.994650     roc_auc      20.579698  1011.233671                0.231538           5.467069            2       True        116
2           LightGBM_r196_BAG_L2   0.994583     roc_auc      20.493186  1020.408293                0.145026          14.641691            2       True        114
3        RandomForestGini_BAG_L2   0.994549     roc_auc      20.615235  1006.808271                0.267074           1.041669            2       True        111
4        RandomForestEntr_BAG_L2   0.994457     roc_auc      20.593266  1006.821486                0.245105           1.054884  

'AutogluonModels/ag-20240601_120330/ensemble_model.png'

In [11]:
!cp -r /content/AutogluonModels {SHORTCUT}

In [14]:
def colab2pdf():
    # Colab2PDF by Drengskapur (https://github.com/drengskapur/colab2pdf)
    # @title Convert Colab Notebook to PDF {display-mode:'form'}
    # VERSION 1.4
    # LICENSE: GPL-3.0-or-later
    !apt-get install -yqq --no-install-recommends librsvg2-bin>/dev/null
    import contextlib,datetime,google,io,IPython,ipywidgets,json,locale,nbformat,os,pathlib,requests,urllib,warnings,werkzeug,yaml;locale.setlocale(locale.LC_ALL,'en_US.UTF-8')
    def convert(b):
        try:
            s.value='⚙️ Converting...';b.disabled=True;get_ipython().events.register('post_execute',lambda:IPython.display.display(IPython.display.Javascript('document.querySelectorAll("#output-footer").forEach(footer=>footer.remove());')))
            n=pathlib.Path(werkzeug.utils.secure_filename(urllib.parse.unquote(requests.get(f'http://{os.environ["COLAB_JUPYTER_IP"]}:{os.environ["KMP_TARGET_PORT"]}/api/sessions').json()[0]['name'])))
            p=pathlib.Path('/content/pdfs')/f'{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}_{n.stem}';p.mkdir(parents=True,exist_ok=True);warnings.filterwarnings('ignore',category=nbformat.validator.MissingIDFieldWarning)
            nb=[cell for cell in nbformat.reads(json.dumps(google.colab._message.blocking_request('get_ipynb',timeout_sec=600)['ipynb']),as_version=4).cells if '--Colab2PDF' not in cell.source]
            with (p/f'{n.stem}.ipynb').open('w',encoding='utf-8') as cp:nbformat.write(nbformat.v4.new_notebook(cells=nb or [nbformat.v4.new_code_cell('#')]),cp)
            with (p/'config.yml').open('w',encoding='utf-8') as f:yaml.dump({'include-in-header':[{'text':r'\usepackage{fvextra}\DefineVerbatimEnvironment{Highlighting}{Verbatim}{breaksymbolleft={},showspaces=false,showtabs=false,breaklines,breakanywhere,commandchars=\\\{\}}'}],'include-before-body':[{'text':r'\DefineVerbatimEnvironment{verbatim}{Verbatim}{breaksymbolleft={},showspaces=false,showtabs=false,breaklines}'}]},f)
            !quarto render {p}/{n.stem}.ipynb --metadata-file={p}/config.yml --to pdf -M latex-auto-install -M margin-top=1in -M margin-bottom=1in -M margin-left=1in -M margin-right=1in --quiet
            google.colab.files.download(str(p/f'{n.stem}.pdf'));s.value=f'🎉 Downloaded {n.stem}.pdf'
        except Exception as e:
            s.value=f'⚠️ ERROR {str(e)}'
        finally:
            b.disabled=False
    if not pathlib.Path('/usr/local/bin/quarto').exists():
        !wget -q 'https://quarto.org/download/latest/quarto-linux-amd64.deb' && dpkg -i quarto-linux-amd64.deb>/dev/null && quarto install tinytex --update-path --quiet && rm quarto-linux-amd64.deb
    b=ipywidgets.widgets.Button(description='⬇️ Download PDF');s=ipywidgets.widgets.Label();b.on_click(lambda b:convert(b));IPython.display.display(ipywidgets.widgets.HBox([b,s]))
    IPython.display.display(IPython.display.Javascript('document.currentScript.parentElement.closest(".output_subarea").querySelector("#output-footer > input").remove();'))
colab2pdf()

HBox(children=(Button(description='⬇️ Download PDF', style=ButtonStyle()), Label(value='')))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>