In [2]:
import os
import pandas as pd
from sqlalchemy import create_engine
from config import db_pass
import pymysql

### Extract CSVs into DataFrames

In [3]:
nyse_file = "Resources/companylist_nyse.csv"
nyse_df = pd.read_csv(nyse_file)
nyse_df.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Unnamed: 8
0,DDD,3D Systems Corporation,10.75,$1.25B,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd,
1,MMM,3M Company,190.21,$109.64B,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm,
2,WBAI,500.com Limited,11.97,$508.97M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai,
3,WUBA,58.com Inc.,71.69,$10.63B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba,
4,EGHT,8x8 Inc,23.7,$2.27B,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght,


### Transform premise DataFrame

In [4]:
# Remove unnamed column from dataframe since it is not valid
nyse_cols = ["Symbol", "Name", "LastSale", "MarketCap", "IPOyear", "Sector", "industry", "Summary Quote"]
nyse_transformed= nyse_df[nyse_cols].copy()


# Create a filtered dataframe from specific columns
#premise_cols = ["License Serial Number", "Premises Name", "County ID Code"]
#premise_transformed= premise_df[premise_cols].copy()

# Rename the column headers
#premise_transformed = premise_transformed.rename(columns={"License Serial Number": "id",
#                                                          "Premises Name": "premise_name",
#                                                          "County ID Code": "county_id"})

# Clean the data by dropping duplicates and setting the index
#premise_transformed.drop_duplicates("id", inplace=True)
#premise_transformed.set_index("id", inplace=True)

nyse_transformed.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote
0,DDD,3D Systems Corporation,10.75,$1.25B,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd
1,MMM,3M Company,190.21,$109.64B,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm
2,WBAI,500.com Limited,11.97,$508.97M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai
3,WUBA,58.com Inc.,71.69,$10.63B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba
4,EGHT,8x8 Inc,23.7,$2.27B,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght


### Create database connection

In [6]:
#connection_string = "root:db_pass@localhost/stocks_db"
#engine = create_engine(f'mysql://{connection_string}')

host = os.getenv('MYSQL_HOST')
port = os.getenv('MYSQL_PORT')
user = os.getenv('MYSQL_USER')
password = os.getenv('MYSQL_PASSWORD')
database = os.getenv('MYSQL_DATABASE')

conn = pymysql.connect(
    host=host,
    port=int(3306),
    user="root",
    passwd=db_pass,
    db="stocks_db",
    charset='utf8mb4')

df = pd.read_sql_query("SELECT * FROM companylist_nyse",
    conn)
df.tail(10)

Unnamed: 0,id,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary,Quote


In [7]:
# Confirm tables
engine.table_names()

NameError: name 'engine' is not defined

### Load DataFrames into database

In [8]:
premise_transformed.to_sql(name='premise', con=engine, if_exists='append', index=True)

In [9]:
county_transformed.to_sql(name='county', con=engine, if_exists='append', index=True)

### Transform county DataFrame

In [5]:
county_cols = ["ID", "County Name (Licensee)", "County ID Code", "License Count"]
county_transformed = county_df[county_cols].copy()

# Rename the column headers
county_transformed = county_transformed.rename(columns={"ID": "id",
                                                         "County Name (Licensee)": "county_name",
                                                         "License Count": "license_count",
                                                         "County ID Code": "county_id"})

# Set index
county_transformed.set_index("id", inplace=True)

county_transformed.head()

Unnamed: 0_level_0,county_name,county_id,license_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,ALBANY,5,77
1,ALLEGANY,59,4
2,BRONX,0,104
3,BROOME,35,14
4,CATTARAUGUS,41,9
