In [1]:
import pandas as pd
from sqlalchemy import create_engine
from keys import (username, password)

In [2]:
print(username)

postgres


### Extract CSVs into DataFrames

In [3]:
securities_file = "Resources/securities.csv"
securities_df = pd.read_csv(securities_file)
securities_df.head()

Unnamed: 0,Ticker symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Address of Headquarters,Date first added,CIK
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",,66740
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800
2,ABBV,AbbVie,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152
3,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373
4,ATVI,Activision Blizzard,reports,Information Technology,Home Entertainment Software,"Santa Monica, California",2015-08-31,718877


In [4]:
securities_df.columns

Index(['Ticker symbol', 'Security', 'SEC filings', 'GICS Sector',
       'GICS Sub Industry', 'Address of Headquarters', 'Date first added',
       'CIK'],
      dtype='object')

In [5]:
securities_cols = ['Ticker symbol', 'Security', 'GICS Sector', 'GICS Sub Industry']
securities_transformed= securities_df[securities_cols].copy()

In [6]:
securities_transformed = securities_transformed.sort_values(by=['Ticker symbol'], ignore_index = True)

In [7]:
securities_transformed.columns = ['Ticker', 'company_name', 'industry', 'sub_industry']

In [19]:
securities_transformed.describe()

Unnamed: 0,Ticker,company_name,industry,sub_industry
count,505,505,505,505
unique,505,504,11,124
top,EW,Under Armour,Consumer Discretionary,Industrial Conglomerates
freq,1,2,85,21


In [20]:
securities_transformed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 505 entries, 0 to 504
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Ticker        505 non-null    object
 1   company_name  505 non-null    object
 2   industry      505 non-null    object
 3   sub_industry  505 non-null    object
dtypes: object(4)
memory usage: 15.9+ KB


In [22]:
securities_transformed[securities_transformed.duplicated(['company_name'])]

Unnamed: 0,Ticker,company_name,industry,sub_industry
451,UAA,Under Armour,Consumer Discretionary,"Apparel, Accessories & Luxury Goods"


In [23]:
securities_transformed.loc[securities_transformed['company_name'] == "Under Armour"]

Unnamed: 0,Ticker,company_name,industry,sub_industry
450,UA,Under Armour,Consumer Discretionary,"Apparel, Accessories & Luxury Goods"
451,UAA,Under Armour,Consumer Discretionary,"Apparel, Accessories & Luxury Goods"


In [24]:
connection_string = f"{username}:{password}@localhost:5432/finance_db"
engine = create_engine(f'postgresql://{connection_string}')

In [25]:
# Confirm tables
engine.table_names()

  engine.table_names()


['fundamentals']

In [26]:
# Use pandas to load csv converted DataFrame into database
securities_transformed.to_sql(name='fundamentals', con=engine, if_exists='append', index=False)

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "fundamentals_pkey"
DETAIL:  Key ("Ticker")=(A) already exists.

[SQL: INSERT INTO fundamentals ("Ticker", company_name, industry, sub_industry) VALUES (%(Ticker)s, %(company_name)s, %(industry)s, %(sub_industry)s)]
[parameters: ({'Ticker': 'A', 'company_name': 'Agilent Technologies Inc', 'industry': 'Health Care', 'sub_industry': 'Health Care Equipment'}, {'Ticker': 'AAL', 'company_name': 'American Airlines Group', 'industry': 'Industrials', 'sub_industry': 'Airlines'}, {'Ticker': 'AAP', 'company_name': 'Advance Auto Parts', 'industry': 'Consumer Discretionary', 'sub_industry': 'Automotive Retail'}, {'Ticker': 'AAPL', 'company_name': 'Apple Inc.', 'industry': 'Information Technology', 'sub_industry': 'Computer Hardware'}, {'Ticker': 'ABBV', 'company_name': 'AbbVie', 'industry': 'Health Care', 'sub_industry': 'Pharmaceuticals'}, {'Ticker': 'ABC', 'company_name': 'AmerisourceBergen Corp', 'industry': 'Health Care', 'sub_industry': 'Health Care Distributors'}, {'Ticker': 'ABT', 'company_name': 'Abbott Laboratories', 'industry': 'Health Care', 'sub_industry': 'Health Care Equipment'}, {'Ticker': 'ACN', 'company_name': 'Accenture plc', 'industry': 'Information Technology', 'sub_industry': 'IT Consulting & Other Services'}  ... displaying 10 of 505 total bound parameter sets ...  {'Ticker': 'ZION', 'company_name': 'Zions Bancorp', 'industry': 'Financials', 'sub_industry': 'Regional Banks'}, {'Ticker': 'ZTS', 'company_name': 'Zoetis', 'industry': 'Health Care', 'sub_industry': 'Pharmaceuticals'})]
(Background on this error at: http://sqlalche.me/e/14/gkpj)

In [27]:
#  Confirm data has been added by querying the customer_name table
pd.read_sql_query('select * from fundamentals', con=engine).head()

Unnamed: 0,Ticker,company_name,industry,sub_industry
0,A,Agilent Technologies Inc,Health Care,Health Care Equipment
1,AAL,American Airlines Group,Industrials,Airlines
2,AAP,Advance Auto Parts,Consumer Discretionary,Automotive Retail
3,AAPL,Apple Inc.,Information Technology,Computer Hardware
4,ABBV,AbbVie,Health Care,Pharmaceuticals
