# Fetching Telco Customer Churn Dataset from Kaggle

In [None]:
import warnings
import joblib
from datetime import datetime
from pathlib import Path
import subprocess
import os
import zipfile

import pandas as pd

%load_ext autoreload
%autoreload 2

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Various settings
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_colwidth", 40)
pd.set_option("display.precision", 4)
pd.set_option("display.max_columns", None)

## Experimenting with Python Tools

In [None]:
# Let's define useful paths

In [None]:
from churn_detection.paths import EXTERNAL_DATA_DIR

In [None]:
CURRENT_DIR = Path().cwd()

In [None]:
zip_file = Path("telco-customer-churn.zip")
csv_file = Path("WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [None]:
# Let's download and unzip the dataset using Kaggle package

In [None]:
subprocess.run(['kaggle', 'datasets', 'download', '-d', 'blastchar/telco-customer-churn'])

In [None]:
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(CURRENT_DIR)

In [None]:
# Let's read the extracted CSV file
data = pd.read_csv(csv_file)  
data.head()

In [None]:
# Let's convert it to feather format into data directory
data.to_feather(EXTERNAL_DATA_DIR / "customer_churn.feather")

In [None]:
# Let's remove the downloaded files
subprocess.run(['cmd', '/c', 'del', str(zip_file)], check=True)

In [None]:
subprocess.run(['cmd', '/c', 'del', str(csv_file)], check=True)

## Refactoring as Modular Code

In [None]:
from churn_detection import data

In [None]:
target_dataset = "blastchar/telco-customer-churn"

In [None]:
churn_data = data.fetch_batch_data(target_dataset)

In [None]:
churn_data.head()

In [None]:
data.save_batch_data(churn_data)