In [1]:
import pandas as pd
from sqlalchemy import create_engine
# Import our pymongo library, which lets us connect our Flask app to our Mongo database.
import pymongo
import json

### Store CSV into DataFrame

In [2]:
csv_file = "../Resources/customer_data.csv"
customer_data_df = pd.read_csv(csv_file)
customer_data_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,car
0,1,Benetta,Cancott,bcancott0@studiopress.com,Female,Scion
1,2,Lilyan,Cherry,lcherry1@deliciousdays.com,Female,Chrysler
2,3,Ezekiel,Benasik,ebenasik2@wikia.com,Male,Mercedes-Benz
3,4,Kennedy,Atlay,katlay3@so-net.ne.jp,Male,Buick
4,5,Sanford,Salmen,ssalmen4@reuters.com,Male,Lincoln


### Create new data with select columns

In [3]:
new_customer_data_df = customer_data_df[['id', 'first_name', 'last_name']].copy()
new_customer_data_df.head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Store JSON data into a DataFrame

In [4]:
json_file = "../Resources/customer_location.json"
customer_location_df = pd.read_json(json_file)
customer_location_df.head()

Unnamed: 0,id,address,longitude,latitude,us_state
0,1,043 Mockingbird Place,-86.5186,39.1682,Indiana
1,2,4 Prentice Point,-85.0707,41.0938,Indiana
2,3,46 Derek Junction,-96.7776,32.7673,Texas
3,4,11966 Old Shore Place,-94.3567,39.035,Missouri
4,5,5 Evergreen Circle,-73.9772,40.7808,New York


### Clean DataFrame

In [5]:
new_customer_location_df = customer_location_df[["id", "address", "us_state"]].copy()
new_customer_location_df.head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York


# NoSQL

In [12]:
records_data = json.loads(new_customer_data_df.T.to_json()).values()
records_location = json.loads(new_customer_location_df.T.to_json()).values()

In [None]:
# db.myCollection.insert(records)              

In [13]:
# Create connection variable
conn = 'mongodb://localhost:27017'

# Pass connection to the pymongo instance.
client = pymongo.MongoClient(conn)

# Connect to a database. Will create one if not already available.
db = client.customer_db

# Drops collection if available to remove duplicates
db.customer_data.drop()
db.customer_location.drop()

In [14]:
db.customer_data.insert(records_data)

  """Entry point for launching an IPython kernel.


[ObjectId('5dc6f9fbfa986fde9baab0e7'),
 ObjectId('5dc6f9fbfa986fde9baab0e8'),
 ObjectId('5dc6f9fbfa986fde9baab0e9'),
 ObjectId('5dc6f9fbfa986fde9baab0ea'),
 ObjectId('5dc6f9fbfa986fde9baab0eb'),
 ObjectId('5dc6f9fbfa986fde9baab0ec'),
 ObjectId('5dc6f9fbfa986fde9baab0ed'),
 ObjectId('5dc6f9fbfa986fde9baab0ee'),
 ObjectId('5dc6f9fbfa986fde9baab0ef'),
 ObjectId('5dc6f9fbfa986fde9baab0f0'),
 ObjectId('5dc6f9fbfa986fde9baab0f1'),
 ObjectId('5dc6f9fbfa986fde9baab0f2'),
 ObjectId('5dc6f9fbfa986fde9baab0f3'),
 ObjectId('5dc6f9fbfa986fde9baab0f4'),
 ObjectId('5dc6f9fbfa986fde9baab0f5'),
 ObjectId('5dc6f9fbfa986fde9baab0f6'),
 ObjectId('5dc6f9fbfa986fde9baab0f7'),
 ObjectId('5dc6f9fbfa986fde9baab0f8'),
 ObjectId('5dc6f9fbfa986fde9baab0f9'),
 ObjectId('5dc6f9fbfa986fde9baab0fa'),
 ObjectId('5dc6f9fbfa986fde9baab0fb'),
 ObjectId('5dc6f9fbfa986fde9baab0fc'),
 ObjectId('5dc6f9fbfa986fde9baab0fd'),
 ObjectId('5dc6f9fbfa986fde9baab0fe'),
 ObjectId('5dc6f9fbfa986fde9baab0ff'),
 ObjectId('5dc6f9fbfa986f

In [15]:
db.customer_location.insert(records_location)

  """Entry point for launching an IPython kernel.


[ObjectId('5dc6fa02fa986fde9baab4cf'),
 ObjectId('5dc6fa02fa986fde9baab4d0'),
 ObjectId('5dc6fa02fa986fde9baab4d1'),
 ObjectId('5dc6fa02fa986fde9baab4d2'),
 ObjectId('5dc6fa02fa986fde9baab4d3'),
 ObjectId('5dc6fa02fa986fde9baab4d4'),
 ObjectId('5dc6fa02fa986fde9baab4d5'),
 ObjectId('5dc6fa02fa986fde9baab4d6'),
 ObjectId('5dc6fa02fa986fde9baab4d7'),
 ObjectId('5dc6fa02fa986fde9baab4d8'),
 ObjectId('5dc6fa02fa986fde9baab4d9'),
 ObjectId('5dc6fa02fa986fde9baab4da'),
 ObjectId('5dc6fa02fa986fde9baab4db'),
 ObjectId('5dc6fa02fa986fde9baab4dc'),
 ObjectId('5dc6fa02fa986fde9baab4dd'),
 ObjectId('5dc6fa02fa986fde9baab4de'),
 ObjectId('5dc6fa02fa986fde9baab4df'),
 ObjectId('5dc6fa02fa986fde9baab4e0'),
 ObjectId('5dc6fa02fa986fde9baab4e1'),
 ObjectId('5dc6fa02fa986fde9baab4e2'),
 ObjectId('5dc6fa02fa986fde9baab4e3'),
 ObjectId('5dc6fa02fa986fde9baab4e4'),
 ObjectId('5dc6fa02fa986fde9baab4e5'),
 ObjectId('5dc6fa02fa986fde9baab4e6'),
 ObjectId('5dc6fa02fa986fde9baab4e7'),
 ObjectId('5dc6fa02fa986f

In [None]:
# Creates a collection in the database and inserts two documents
db.team.insert_many(
    [
        {
            'player': 'Jessica',
            'position': 'Point Guard'
        },
        {
            'player': 'Mark',
            'position': 'Center'
        }
    ]
)


### Connect to local database

In [6]:
rds_connection_string = "<insert user name>:<insert password>@localhost:5432/customer_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [7]:
engine.table_names()

['customer_location', 'customer_name']

### Use pandas to load csv converted DataFrame into database

In [8]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Use pandas to load json converted DataFrame into database

In [9]:
new_customer_location_df.to_sql(name='customer_location', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the customer_name table
* NOTE: can also check using pgAdmin

In [10]:
pd.read_sql_query('select * from customer_name', con=engine).head()

Unnamed: 0,id,first_name,last_name
0,1,Benetta,Cancott
1,2,Lilyan,Cherry
2,3,Ezekiel,Benasik
3,4,Kennedy,Atlay
4,5,Sanford,Salmen


### Confirm data has been added by querying the customer_location table

In [11]:
pd.read_sql_query('select * from customer_location', con=engine).head()

Unnamed: 0,id,address,us_state
0,1,043 Mockingbird Place,Indiana
1,2,4 Prentice Point,Indiana
2,3,46 Derek Junction,Texas
3,4,11966 Old Shore Place,Missouri
4,5,5 Evergreen Circle,New York
