### Import Required Libraries

In [1]:
import os
import json
import pymongo
import pandas as pd
import pymysql
from sqlalchemy import create_engine

#### Declare & Assign Connection Variables for the MySQL Server & Databases Working With

In [2]:
host_name = "localhost"
host_ip = "127.0.0.1"
port = "3306"
user_id = "arong"
pwd = "Passw0rd123"

src_dbname = "US_City_Pop"
dst_dbname = "ds3002_capstone"

#### Define Functions for Getting Data From and Setting Data into Databases

In [3]:
def get_dataframe(user_id, pwd, host_name, db_name, sql_query):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    dframe = pd.read_sql(sql_query, connection);
    connection.close()
    
    return dframe


def set_dataframe(user_id, pwd, host_name, db_name, df, table_name, pk_column, db_operation):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    
    if db_operation == "insert":
        df.to_sql(table_name, con=connection, index=False, if_exists='replace')
        sqlEngine.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY ({pk_column});")
            
    elif db_operation == "update":
        df.to_sql(table_name, con=connection, index=False, if_exists='append')

    connection.close()

#### Create New Data Warehouse database, and to Use it, Switch the Connection Context

In [4]:
conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}"
sqlEngine = create_engine(conn_str, pool_recycle=3600)

sqlEngine.execute(f"DROP DATABASE IF EXISTS `{dst_dbname}`;")
sqlEngine.execute(f"CREATE DATABASE `{dst_dbname}`;")
sqlEngine.execute(f"USE {dst_dbname};")

<sqlalchemy.engine.result.ResultProxy at 0x7f891907f0d0>

### Extract Data from the Source Database Tables

In [5]:
sql_pops = "SELECT * FROM US_City_Pop.`us-cities-demographics`;"
df_pops = get_dataframe(user_id, pwd, host_name, src_dbname, sql_pops)
df_pops.head(5)

Unnamed: 0,City,State,Median Age,Male Population,Female Population,Total Population,Number of Veterans,Foreign-born,Average Household Size,State Code,Race,Count
0,Newark,New Jersey,34.6,138040,143873,281913,5829,86253,2.73,NJ,White,76402
1,Peoria,Illinois,33.1,56229,62432,118661,6634,7517,2.4,IL,American Indian and Alaska Native,1343
2,O'Fallon,Missouri,36.0,41762,43270,85032,5783,3269,2.77,MO,Hispanic or Latino,2583
3,Hampton,Virginia,35.5,66214,70240,136454,19638,6204,2.48,VA,Black or African-American,70303
4,Lakewood,Colorado,37.7,76013,76576,152589,9988,14169,2.29,CO,Hispanic or Latino,33630


In [6]:
drop_cols = ['Foreign-born','Average Household Size','Number of Veterans','Count']
df_pops.drop(drop_cols, axis=1, inplace=True)

df_pops.head(5)

Unnamed: 0,City,State,Median Age,Male Population,Female Population,Total Population,State Code,Race
0,Newark,New Jersey,34.6,138040,143873,281913,NJ,White
1,Peoria,Illinois,33.1,56229,62432,118661,IL,American Indian and Alaska Native
2,O'Fallon,Missouri,36.0,41762,43270,85032,MO,Hispanic or Latino
3,Hampton,Virginia,35.5,66214,70240,136454,VA,Black or African-American
4,Lakewood,Colorado,37.7,76013,76576,152589,CO,Hispanic or Latino


### Connection to the MongoDB Instance

In [7]:
# pip install pymongo[srv]

In [8]:
host_name = "localhost"
port = "27017"

atlas_cluster_name = "sandbox"
atlas_default_dbname = "sample_airbnb"
atlas_user_name = "m001-student"
atlas_password = "m001-mongodb-basics"

conn_str = {"local" : f"mongodb://{host_name}:{port}/",
    "atlas" : f"mongodb+srv://{atlas_user_name}:{atlas_password}@{atlas_cluster_name}.zibbf.mongodb.net/{atlas_default_dbname}?retryWrites=true&w=majority"
}

#### Interogate the MongoDB Atlas instance for the databases it hosts.

In [9]:
client = pymongo.MongoClient(conn_str["atlas"])
client.list_database_names()

['adventure_works',
 'blog',
 'sample_airbnb',
 'sample_analytics',
 'sample_geospatial',
 'sample_mflix',
 'sample_restaurants',
 'sample_supplies',
 'sample_training',
 'sample_weatherdata',
 'admin',
 'local']

#### Connect to the "sample_airbnb" database, and interogate it for the collections it contains.

In [10]:
db_name = "sample_airbnb"

db = client[db_name]
db.list_collection_names()

['listingsAndReviews']

#### Connect the listsAndReviews collection to query data

In [25]:
'''
Columns I am interested in:
    - price, security_deposit, weekly_price, monthly_price, address
- get address (dict-like) by itself and get new df from that 
    - from the address, extract additional data 
'''

'\nColumns I am interested in:\n    - price, security_deposit, weekly_price, monthly_price, address\n- get address (dict-like) by itself and get new df from that \n    - from the address, extract additional data \n'

In [34]:
collection = "listingsAndReviews"
listingsAndReviews = db[collection]

df = pd.DataFrame(list(listingsAndReviews.find()))

#### Transform

In [36]:
# df.columns

# extract columns of interest
new_df = df[['price','security_deposit', 'weekly_price', 'monthly_price']]

new_df.head(5)

Unnamed: 0,price,security_deposit,weekly_price,monthly_price
0,80.0,200.0,,
1,317.0,,1492.0,4849.0
2,115.0,,650.0,2150.0
3,40.0,,,
4,701.0,1000.0,,


In [54]:
df.head(5)
addresses = pd.json_normalize(df['address'])
countries = addresses['country_code']
united_states = addresses[countries == 'US']
states = united_states['street'].str.split(',').str[1]

unique_states = states.value_counts()
unique_states
#states
#addresses
#countries
#united_states

 HI       610
 NY       608
 KAUAI      2
 US         1
 Hi         1
Name: street, dtype: int64

In [56]:
airports_and_locations = pd.read_csv('us-airports.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'us-airports.csv'

## API - Aviation Stack

In [30]:
import requests

params = {
  'access_key': 'fcca77556058dc509fe815718612c4f9'
}

api_result = requests.get('http://api.aviationstack.com/v1/flights', params)

api_response = api_result.json()

# print(api_response)
df = pd.DataFrame.from_dict('api_response')

ValueError: DataFrame constructor not properly called!