In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
people_file = "Resources/People.csv"
people_df = pd.read_csv(people_file)
people_df.head()

Unnamed: 0,playerID,birthYear,birthMonth,birthDay,birthCountry,birthState,birthCity,deathYear,deathMonth,deathDay,...,nameLast,nameGiven,weight,height,bats,throws,debut,finalGame,retroID,bbrefID
0,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,...,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01
1,aaronha01,1934.0,2.0,5.0,USA,AL,Mobile,,,,...,Aaron,Henry Louis,180.0,72.0,R,R,1954-04-13,1976-10-03,aaroh101,aaronha01
2,aaronto01,1939.0,8.0,5.0,USA,AL,Mobile,1984.0,8.0,16.0,...,Aaron,Tommie Lee,190.0,75.0,R,R,1962-04-10,1971-09-26,aarot101,aaronto01
3,aasedo01,1954.0,9.0,8.0,USA,CA,Orange,,,,...,Aase,Donald William,190.0,75.0,R,R,1977-07-26,1990-10-03,aased001,aasedo01
4,abadan01,1972.0,8.0,25.0,USA,FL,Palm Beach,,,,...,Abad,Fausto Andres,184.0,73.0,L,L,2001-09-10,2006-04-13,abada001,abadan01


In [3]:
salaries_file = "Resources/Salaries.csv"
salaries_df = pd.read_csv(salaries_file)
salaries_df.head()

Unnamed: 0,yearID,teamID,lgID,playerID,salary
0,1985,ATL,NL,barkele01,870000
1,1985,ATL,NL,bedrost01,550000
2,1985,ATL,NL,benedbr01,545000
3,1985,ATL,NL,campri01,633333
4,1985,ATL,NL,ceronri01,625000


In [4]:
# Create a filtered dataframe from specific columns
people_cols = ["playerID", "nameGiven"]
people_transformed= people_df[people_cols].copy()

# Rename the column headers
people_transformed = people_transformed.rename(columns={"playerID": "id",
                                                          "nameGiven": "player_name"})

# Clean the data by dropping duplicates and setting the index
people_transformed.drop_duplicates("id", inplace=True)
people_transformed.set_index("id", inplace=True)

people_transformed.head()

Unnamed: 0_level_0,player_name
id,Unnamed: 1_level_1
aardsda01,David Allan
aaronha01,Henry Louis
aaronto01,Tommie Lee
aasedo01,Donald William
abadan01,Fausto Andres


In [5]:
# Create a filtered dataframe from specific columns
salaries_cols = ["playerID", "salary"]
salaries_transformed= salaries_df[salaries_cols].copy()

# Rename the column headers
salaries_transformed = salaries_transformed.rename(columns={"playerID": "id",
                                                          "salary": "pay"})

# Clean the data by dropping duplicates and setting the index
salaries_transformed.drop_duplicates("id", inplace=True)
salaries_transformed.set_index("id", inplace=True)

salaries_transformed.head()

Unnamed: 0_level_0,pay
id,Unnamed: 1_level_1
barkele01,870000
bedrost01,550000
benedbr01,545000
campri01,633333
ceronri01,625000


In [6]:
connection_string = "postgres:KansasCity2019!@localhost:5432/project_two"
engine = create_engine(f'postgresql://{connection_string}')

In [7]:
# Confirm tables
engine.table_names()

[]

In [8]:
people_transformed.to_sql(name='people', con=engine, if_exists='append', index=True)

In [9]:
salaries_transformed.to_sql(name='salaries', con=engine, if_exists='append', index=True)