In [23]:
import os
import pandas as pd
from sqlalchemy import create_engine
from config import db_pass
import pymysql

### Extract CSVs into DataFrames

In [24]:
nyse_file = "Resources/companylist_nyse.csv"
nyse_df = pd.read_csv(nyse_file)
nyse_df.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,Summary Quote,Unnamed: 8
0,DDD,3D Systems Corporation,10.75,$1.25B,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd,
1,MMM,3M Company,190.21,$109.64B,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm,
2,WBAI,500.com Limited,11.97,$508.97M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai,
3,WUBA,58.com Inc.,71.69,$10.63B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba,
4,EGHT,8x8 Inc,23.7,$2.27B,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght,


In [25]:
# Remove unnamed column from dataframe since it is not valid
nyse_cols = ["Symbol", "Name", "LastSale", "MarketCap", "IPOyear", "Sector", "industry", "Summary Quote"]
nyse_transformed= nyse_df[nyse_cols].copy()

# Rename the column headers
nyse_transformed = nyse_transformed.rename(columns={"Summary Quote": "SummaryQuote"})

nyse_transformed.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,SummaryQuote
0,DDD,3D Systems Corporation,10.75,$1.25B,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd
1,MMM,3M Company,190.21,$109.64B,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm
2,WBAI,500.com Limited,11.97,$508.97M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai
3,WUBA,58.com Inc.,71.69,$10.63B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba
4,EGHT,8x8 Inc,23.7,$2.27B,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght


In [26]:
nyse_transformed["MarketCapNum"]= pd.DataFrame(nyse_transformed["MarketCap"].str.slice(1,-1))
nyse_transformed["MarketCapType"]= pd.DataFrame(nyse_transformed['MarketCap'].str.slice(-1))
nyse_transformed['MarketCapNum'] = pd.to_numeric(nyse_transformed['MarketCapNum'])

In [27]:
nyse_transformed["MarketCapNum1"] = nyse_transformed.apply(lambda row: row.MarketCapNum * 1000 if row.MarketCapType == 'B' else row.MarketCapNum , axis = 1) 
nyse_transformed

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,SummaryQuote,MarketCapNum,MarketCapType,MarketCapNum1
0,DDD,3D Systems Corporation,10.7500,$1.25B,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd,1.25,B,1250.00
1,MMM,3M Company,190.2100,$109.64B,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm,109.64,B,109640.00
2,WBAI,500.com Limited,11.9700,$508.97M,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai,508.97,M,508.97
3,WUBA,58.com Inc.,71.6900,$10.63B,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba,10.63,B,10630.00
4,EGHT,8x8 Inc,23.7000,$2.27B,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght,2.27,B,2270.00
5,AHC,A.H. Belo Corporation,3.7400,$80.59M,,Consumer Services,Newspapers/Magazines,https://www.nasdaq.com/symbol/ahc,80.59,M,80.59
6,AOS,A.O Smith Corporation,55.8100,$9.37B,,Consumer Durables,Consumer Electronics/Appliances,https://www.nasdaq.com/symbol/aos,9.37,B,9370.00
7,ATEN,"A10 Networks, Inc.",6.5800,$494.71M,2014.0,Technology,Computer Communications Equipment,https://www.nasdaq.com/symbol/aten,494.71,M,494.71
8,AAC,"AAC Holdings, Inc.",1.7200,$42.45M,2014.0,Health Care,Medical Specialities,https://www.nasdaq.com/symbol/aac,42.45,M,42.45
9,AIR,AAR Corp.,33.8100,$1.19B,,Capital Goods,Aerospace,https://www.nasdaq.com/symbol/air,1.19,B,1190.00


In [28]:
nyse_cols = ["Symbol", "Name", "LastSale", "MarketCapNum1", "IPOyear", "Sector", "industry", "SummaryQuote"]
nyse_load= nyse_transformed[nyse_cols].copy()
nyse_load = nyse_load.rename(columns={"MarketCapNum1": "MarketCap"})
nyse_load.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,SummaryQuote
0,DDD,3D Systems Corporation,10.75,1250.0,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd
1,MMM,3M Company,190.21,109640.0,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm
2,WBAI,500.com Limited,11.97,508.97,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai
3,WUBA,58.com Inc.,71.69,10630.0,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba
4,EGHT,8x8 Inc,23.7,2270.0,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght


### Create database connection

In [29]:
connection_string = f"root:{db_pass}@localhost/stocks_db"
engine = create_engine(f'mysql+pymysql://{connection_string}')

In [30]:
# Confirm tables
engine.table_names()

['all_stocks_5yr',
 'companylist_nasdq',
 'companylist_nyse',
 'industry_analysis']

### Load DataFrames into database

In [32]:
nyse_load.to_sql(name='companylist_nyse', con=engine, if_exists='append', index=False)


In [33]:
#Confirm load
pd.read_sql_query('select * from companylist_nyse', con=engine).head()

Unnamed: 0,id,Symbol,Name,LastSale,MarketCap,IPOyear,Sector,industry,SummaryQuote
0,1,DDD,3D Systems Corporation,10.75,1250.0,,Technology,Computer Software: Prepackaged Software,https://www.nasdaq.com/symbol/ddd
1,2,MMM,3M Company,190.21,109640.0,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm
2,3,WBAI,500.com Limited,11.97,508.97,2013.0,Consumer Services,Services-Misc. Amusement & Recreation,https://www.nasdaq.com/symbol/wbai
3,4,WUBA,58.com Inc.,71.69,10630.0,2013.0,Technology,"Computer Software: Programming, Data Processing",https://www.nasdaq.com/symbol/wuba
4,5,EGHT,8x8 Inc,23.7,2270.0,,Technology,EDP Services,https://www.nasdaq.com/symbol/eght
