In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pydataset import data
import statistics
import seaborn as sns
import env
import os

### 7. Make a function named `get_titanic_data` that returns the titanic data from the codeup data science database as a pandas data frame. Obtain your data from the _Codeup Data Science Database_. 

In [2]:
def get_connection(db, user=env.user, host=env.host, password=env.password):
    return f'mysql+pymysql://{user}:{password}@{host}/{db}'

In [3]:
def new_titanic_data():
    return pd.read_sql('SELECT * FROM passengers', get_connection('titanic_db'))

In [4]:
def get_titanic_data():
    filename = "titanic.csv"
    if os.path.isfile(filename):
        return pd.read_csv(filename)
    else:
        # read the SQL query into a dataframe
        df = new_titanic_data()
        
        # Write that dataframe to disk for later. Called "caching" the data for later.
        df.to_csv(filename)

        # Return the dataframe to the calling code
        return df  

In [5]:
titanic_df = get_titanic_data()
titanic_df.head()

Unnamed: 0.1,Unnamed: 0,passenger_id,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,deck,embark_town,alone
0,0,0,0,3,male,22.0,1,0,7.25,S,Third,,Southampton,0
1,1,1,1,1,female,38.0,1,0,71.2833,C,First,C,Cherbourg,0
2,2,2,1,3,female,26.0,0,0,7.925,S,Third,,Southampton,1
3,3,3,1,1,female,35.0,1,0,53.1,S,First,C,Southampton,0
4,4,4,0,3,male,35.0,0,0,8.05,S,Third,,Southampton,1


### 8. Make a function named `get_iris_data` that returns the data from the `iris_db` on the codeup data science database as a pandas data frame. The returned data frame should include the actual name of the species in addition to the `species_id`s. Obtain your data from the _Codeup Data Science Database_. 

In [6]:
sql = """
            SELECT 
                species_id,
                species_name,
                sepal_length,
                sepal_width,
                petal_length,
                petal_width
            FROM measurements
            JOIN species USING(species_id)
            
            """

In [7]:
def new_iris_data():
    return pd.read_sql(sql, get_connection('iris_db'))

In [8]:
def get_iris_data():
    filename = "iris.csv"
    if os.path.isfile(filename):
        return pd.read_csv(filename)
    else:
        # read the SQL query into a dataframe
        df = new_iris_data()
        
        # Write that dataframe to disk for later. Called "caching" the data for later.
        df.to_csv(filename)

        # Return the dataframe to the calling code
        return df  

In [9]:
iris_df = get_iris_data()
iris_df

Unnamed: 0,species_id,species_name,sepal_length,sepal_width,petal_length,petal_width
0,1,setosa,5.1,3.5,1.4,0.2
1,1,setosa,4.9,3.0,1.4,0.2
2,1,setosa,4.7,3.2,1.3,0.2
3,1,setosa,4.6,3.1,1.5,0.2
4,1,setosa,5.0,3.6,1.4,0.2
5,1,setosa,5.4,3.9,1.7,0.4
6,1,setosa,4.6,3.4,1.4,0.3
7,1,setosa,5.0,3.4,1.5,0.2
8,1,setosa,4.4,2.9,1.4,0.2
9,1,setosa,4.9,3.1,1.5,0.1


### 9. Make a function named `get_telco_data` that returns the data from the `telco_churn` database in SQL. In your SQL, be sure to join all 4 tables together, so that the resulting dataframe contains all the contract, payment, and internet service options. Obtain your data from the _Codeup Data Science Database_. 

In [10]:
sql = """

SELECT * FROM customers
JOIN contract_types USING (contract_type_id)
JOIN internet_service_types USING (internet_service_type_id)
JOIN payment_types USING (payment_type_id)
;

"""

In [11]:
def new_telco_data():
    return pd.read_sql(sql, get_connection('telco_churn'))

In [12]:
def get_telco_data():
    filename = "telco.csv"
    if os.path.isfile(filename):
        return pd.read_csv(filename)
    else:
        # read the SQL query into a dataframe
        df = new_telco_data()
        
        # Write that dataframe to disk for later. Called "caching" the data for later.
        df.to_csv(filename)

        # Return the dataframe to the calling code
        return df  

In [13]:
telco_df = get_telco_data()
telco_df

Unnamed: 0.1,Unnamed: 0,payment_type_id,internet_service_type_id,contract_type_id,customer_id,gender,senior_citizen,partner,dependents,tenure,...,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,contract_type,internet_service_type,payment_type
0,0,2,1,3,0016-QLJIS,Female,0,Yes,Yes,65,...,Yes,Yes,Yes,Yes,90.45,5957.9,No,Two year,DSL,Mailed check
1,1,4,1,3,0017-DINOC,Male,0,No,No,54,...,Yes,Yes,No,No,45.20,2460.55,No,Two year,DSL,Credit card (automatic)
2,2,3,1,3,0019-GFNTW,Female,0,No,No,56,...,Yes,No,No,No,45.05,2560.1,No,Two year,DSL,Bank transfer (automatic)
3,3,4,1,3,0056-EPFBG,Male,0,Yes,Yes,20,...,Yes,No,No,Yes,39.40,825.4,No,Two year,DSL,Credit card (automatic)
4,4,3,1,3,0078-XZMHT,Male,0,Yes,No,72,...,Yes,Yes,Yes,Yes,85.15,6316.2,No,Two year,DSL,Bank transfer (automatic)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,7038,2,3,1,9962-BFPDU,Female,0,Yes,Yes,1,...,No internet service,No internet service,No internet service,No,20.05,20.05,No,Month-to-month,,Mailed check
7039,7039,2,3,1,9967-ATRFS,Female,0,No,No,19,...,No internet service,No internet service,No internet service,No,19.90,367.55,No,Month-to-month,,Mailed check
7040,7040,4,3,1,9970-QBCDA,Female,0,No,No,6,...,No internet service,No internet service,No internet service,No,19.70,129.55,No,Month-to-month,,Credit card (automatic)
7041,7041,2,3,1,9975-SKRNR,Male,0,No,No,1,...,No internet service,No internet service,No internet service,No,18.90,18.9,No,Month-to-month,,Mailed check


### 10. Once you've got your `get_titanic_data`, `get_iris_data`, and `get_telco_data` functions written, now it's time to add caching to them. To do this, edit the beginning of the function to check for the local filename of `telco.csv`, `titanic.csv`, or `iris.csv`. If they exist, use the .csv file. If the file doesn't exist, then produce the SQL and pandas necessary to create a dataframe, then write the dataframe to a .csv file with the appropriate name. 

## SEE CODE ABOVE FOR EACH RESPECTIVE FILE

In [14]:
# import os

# def get_titanic_data():
#     filename = "titanic.csv"
    
#     if os.path.isfile(filename):
#         return pd.read_csv(filename)
#     else:
#         # read the SQL query into a dataframe
#         df = new_titanic_data()
        
#         # Write that dataframe to disk for later. Called "caching" the data for later.
#         df.to_csv(filename)

#         # Return the dataframe to the calling code
#         return df  