In [2]:
#!pip install pymysql

In [3]:
import pymysql
pymysql.install_as_MySQLdb()
import MySQLdb

import pandas as pd
from sqlalchemy import create_engine

from config import db_username, db_password, db_host, db_port, db_name

### Store CSV into DataFrame

In [4]:
csv_file = "../Resources/customer_data.csv"
customer_data_df = pd.read_csv(csv_file)
customer_data_df.head()

Unnamed: 0,customer_id,first_name,last_name,email,gender,car
0,1,Benetta,Cancott,bcancott0@studiopress.com,Female,Scion
1,2,Lilyan,Cherry,lcherry1@deliciousdays.com,Female,Chrysler
2,3,Ezekiel,Benasik,ebenasik2@wikia.com,Male,Mercedes-Benz
3,4,Kennedy,Atlay,katlay3@so-net.ne.jp,Male,Buick
4,5,Sanford,Salmen,ssalmen4@reuters.com,Male,Lincoln


### Create new data with select columns

In [5]:
new_customer_data_df = customer_data_df[['customer_id', 'first_name', 'last_name']]
new_customer_data_df.head()

Unnamed: 0,customer_id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Store JSON data into a DataFrame

In [7]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.035,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York


### Clean DataFrame

In [20]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]]
new_customer_location_df.rename(columns={'id':'customer_id'}, inplace=True)
new_customer_location_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


Unnamed: 0,customer_id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


### Connect to Cloud Database

`dialect`://`username`:`password`@`hostname`:`port`/`databasename`

In [9]:
rds_connection_string = f'{db_username}:{db_password}@{db_host}:{db_port}/{db_name}'
engine = create_engine(f'mysql://{rds_connection_string}')
conn = engine.connect()

print(f'mysql://{rds_connection_string}')

mysql://root:Dartlings!@codingbootcamp.cze9dquabwlh.us-east-2.rds.amazonaws.com:3306/customer_db


### Check for tables

In [10]:
query = '''
    SELECT
        TABLE_SCHEMA,
        TABLE_NAME,
        CREATE_TIME
    FROM
        information_schema.tables
    WHERE
        TABLE_TYPE = 'BASE TABLE'
        AND TABLE_SCHEMA = 'customer_db';
'''

tables_df = pd.read_sql(query, conn)

tables_df.head(20)

Unnamed: 0,TABLE_SCHEMA,TABLE_NAME,CREATE_TIME
0,customer_db,customer_location,2020-12-22 22:47:17
1,customer_db,customer_name,2020-12-22 22:47:17


### Use pandas to load csv converted DataFrame into database

In [17]:
new_customer_data_df['dartling_name'] = 'Kwame 6'
new_customer_data_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,customer_id,first_name,last_name,dartling_name
0,1,Benetta,Cancott,Kwame 6
1,2,Lilyan,Cherry,Kwame 6
2,3,Ezekiel,Benasik,Kwame 6
3,4,Kennedy,Atlay,Kwame 6
4,5,Sanford,Salmen,Kwame 6


In [18]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [21]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using MySQL workbench

In [23]:
data_df = pd.read_sql('SELECT * from customer_name', con=conn)
data_df.head()

Unnamed: 0,record_id,customer_id,first_name,last_name,dartling_name
0,1,1,Benetta,Cancott,Dartanion
1,2,2,Lilyan,Cherry,Dartanion
2,3,3,Ezekiel,Benasik,Dartanion
3,4,4,Kennedy,Atlay,Dartanion
4,5,5,Sanford,Salmen,Dartanion


In [24]:
data_df['dartling_name'].unique()

array(['Dartanion', 'Rebekah', 'Vera', 'Redeat', 'Joseph', 'Mike',
       'Ashley', 'Mahdi', 'Meakin', 'Tas', 'SYLVESSSTTTEEERRRRRRR',
       'Leslie', 'Kwame', 'Katherine', 'Angela', 'Gene', 'Henry',
       'Brooke', 'Kwame 6'], dtype=object)

### Confirm data has been added by querying the customer_location table

In [26]:
pd.read_sql('select * from customer_location', con=engine).head()

Unnamed: 0,record_id,customer_id,address,us_state
0,1,1,043 Mockingbird Place,Indiana
1,2,2,4 Prentice Point,Indiana
2,3,3,46 Derek Junction,Texas
3,4,4,11966 Old Shore Place,Missouri
4,5,5,5 Evergreen Circle,New York
