# Acquire Notebook

##### This is where I am writing, and testing my acquire functions for my telco churn classification project

In [2]:
# general imports (from big libraries)
import pandas as pd
import numpy as np
import os

In [3]:
# my specific imports
# this brings in my login credentials to the Codeup database
from env import host, user, password

In [4]:
# sets up a secure connection to the Codeup db using my login info
def get_connection(db, user=user, host=host, password=password):
    '''
    This function uses my env file to create a connection url to access
    the Codeup database.
    '''
    return f'mysql+pymysql://{user}:{password}@{host}/{db}'

In [5]:
# This function will connect with the Codeup database to join all tables in the telco db
# to return a pandas DataFrame
def new_telco_data():
    '''
    This function joins the 'customers', 'contract_types', 'internet_service_types', 
    and 'payment_types' tables from the telco_churn db
    and return a pandas DataFrame with all columns/values from all tables.
    '''
    sql_query = '''SELECT * 
                    FROM customers
                    JOIN contract_types USING(contract_type_id)
                    JOIN internet_service_types USING(internet_service_type_id)
                    JOIN payment_types USING(payment_type_id)'''
    return pd.read_sql(sql_query, get_connection('telco_churn'))

In [6]:
# This function plays on top of new_telco_data by 1st looking to see if there is a .csv of the telco Dataframe, and
# creating one if there is not. This optomizes performance/runtime, by only needing to connect to the server 1 time
# and then using a local .csv thereafter
def get_telco_data():
    '''
    This function reads in Telco data from Codeup database, writes data to
    a csv file if a local file does not exist, and returns a df.
    '''
    if os.path.isfile('telco_df.csv'):
        
        # If csv file exists read in data from csv file.
        df = pd.read_csv('telco_df.csv', index_col=0)
        
    else:
        
        # Read fresh data from db into a DataFrame
        df = new_telco_data()
        
        # Cache data
        df.to_csv('telco_df.csv')
        
    return df

In [7]:
def overview(df):
    '''
    This function returns the shape and info of the df. It also includes a breakdown of the number of unique values
    in each column to determine which are categorical/discrete, and which are numerical/continuous. Finally, it returns
    a breakdown of the statistics on all numerica columns.
    '''
    print(f'This dataframe has {df.shape[0]} rows and {df.shape[1]} columns.')
    print('----------------------------------')
    print('')
    print(df.info())
    print('----------------------------------')
    print('')
    print('Unique value counts of each column')
    print('')
    print(df.nunique())
    print('----------------------------------')
    print('')
    print('Stats on Numeric Columns')
    print('')
    print(df.describe())

### Key Findings and Takeaways:
- Always remember to push up the .gitignore first, to keep login credentials private/secure
- The get_connection function will work for any database in the Codeup database (IF you have login credentials.)
    - If you do not, this function can be adjusted to work with whatever database you do have credentials for
- The new_telco_data function is great for this project, and can be tweaked to work for others...
    - Always test of the sql_query in Sequal Ace first!!!!!
    - This query can be as simple of complex as you need it to be
- The get_telco_data function is a wonderful time saver / effeciency optimisor
    - The function will first look to see if there is a .csv of the db stored locally, and open it if available
    - If not, it will run the new_telco_data function to acquire the telco_churn, and store it to the local
- The overview function is a great way to get a bird's eye view of the dataset
- This dataset is 7043 rows, by 24 columns.
- At first glance, there do not appear to be any missing values.