In [1]:
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Be sure that Postgres/PGAdmin is launched

# establish a database connection
engine = create_engine("postgres+psycopg2://postgres:postgres@localhost:5432/lahman_baseball")
# Replace `<lahman_baseball_database_name>` with the actual name of your lahman baseball database as it appears in pgadmin

# use the connection to run a query using pandas!
batting_df = pd.read_sql("SELECT * FROM batting;", con=engine)
batting_df.head()

Unnamed: 0,playerid,yearid,stint,teamid,lgid,g,ab,r,h,h2b,...,rbi,sb,cs,bb,so,ibb,hbp,sh,sf,gidp
0,abercda01,1871,1,TRO,,1,4,0,0,0,...,0.0,0.0,0.0,0,0.0,,,,,
1,addybo01,1871,1,RC1,,25,118,30,32,6,...,13.0,8.0,1.0,4,0.0,,,,,
2,allisar01,1871,1,CL1,,29,137,28,40,4,...,19.0,3.0,1.0,2,5.0,,,,,
3,allisdo01,1871,1,WS3,,27,133,28,44,10,...,27.0,1.0,1.0,0,2.0,,,,,
4,ansonca01,1871,1,RC1,,25,120,29,39,11,...,16.0,6.0,2.0,2,1.0,,,,,


# College Table

In [3]:
collegeplayer_df = pd.read_sql("SELECT * FROM collegeplaying", con=engine)
collegeplayer_df.head()

Unnamed: 0,playerid,schoolid,yearid
0,aardsda01,pennst,2001
1,aardsda01,rice,2002
2,aardsda01,rice,2003
3,abadan01,gamiddl,1992
4,abadan01,gamiddl,1993


# Vandy Table

In [4]:
vandy_df = collegeplayer_df[collegeplayer_df['schoolid'].str.contains('vandy')]
vandy_df

Unnamed: 0,playerid,schoolid,yearid
232,alvarpe01,vandy,2006
233,alvarpe01,vandy,2007
234,alvarpe01,vandy,2008
895,baxtemi01,vandy,2004
896,baxtemi01,vandy,2005
...,...,...,...
16806,willimi01,vandy,1970
16807,willimi01,vandy,1971
16808,willimi01,vandy,1972
17280,zeidjo01,vandy,2006


In [5]:
vandy_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 65 entries, 232 to 17281
Data columns (total 3 columns):
playerid    65 non-null object
schoolid    65 non-null object
yearid      65 non-null int64
dtypes: int64(1), object(2)
memory usage: 2.0+ KB


# People Table

In [6]:
people_df = pd.read_sql("SELECT * FROM people", con=engine)
people_df.head()

Unnamed: 0,playerid,birthyear,birthmonth,birthday,birthcountry,birthstate,birthcity,deathyear,deathmonth,deathday,...,namelast,namegiven,weight,height,bats,throws,debut,finalgame,retroid,bbrefid
0,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,...,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01
1,aaronha01,1934.0,2.0,5.0,USA,AL,Mobile,,,,...,Aaron,Henry Louis,180.0,72.0,R,R,1954-04-13,1976-10-03,aaroh101,aaronha01
2,aaronto01,1939.0,8.0,5.0,USA,AL,Mobile,1984.0,8.0,16.0,...,Aaron,Tommie Lee,190.0,75.0,R,R,1962-04-10,1971-09-26,aarot101,aaronto01
3,aasedo01,1954.0,9.0,8.0,USA,CA,Orange,,,,...,Aase,Donald William,190.0,75.0,R,R,1977-07-26,1990-10-03,aased001,aasedo01
4,abadan01,1972.0,8.0,25.0,USA,FL,Palm Beach,,,,...,Abad,Fausto Andres,184.0,73.0,L,L,2001-09-10,2006-04-13,abada001,abadan01


In [7]:
people_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19112 entries, 0 to 19111
Data columns (total 24 columns):
playerid        19112 non-null object
birthyear       18980 non-null float64
birthmonth      18810 non-null float64
birthday        18663 non-null float64
birthcountry    19043 non-null object
birthstate      18541 non-null object
birthcity       18932 non-null object
deathyear       9450 non-null float64
deathmonth      9449 non-null float64
deathday        9448 non-null float64
deathcountry    9445 non-null object
deathstate      9400 non-null object
deathcity       9441 non-null object
namefirst       19075 non-null object
namelast        19112 non-null object
namegiven       19075 non-null object
weight          18261 non-null float64
height          18330 non-null float64
bats            17929 non-null object
throws          18135 non-null object
debut           18917 non-null object
finalgame       18917 non-null object
retroid         19049 non-null object
bbrefid        

# Salary Table

In [8]:
salaries_df = pd.read_sql("SELECT * FROM salaries", con=engine)
salaries_df.head()

Unnamed: 0,yearid,teamid,lgid,playerid,salary
0,1985,ATL,NL,barkele01,870000.0
1,1985,ATL,NL,bedrost01,550000.0
2,1985,ATL,NL,benedbr01,545000.0
3,1985,ATL,NL,campri01,633333.0
4,1985,ATL,NL,ceronri01,625000.0


In [9]:
total_salaries_df = salaries_df.groupby('playerid')['salary'].sum().reset_index()
total_salaries_df

Unnamed: 0,playerid,salary
0,aardsda01,9259750.0
1,aasedo01,2300000.0
2,abadan01,327000.0
3,abadfe01,3766400.0
4,abbotje01,985000.0
...,...,...
5144,zumayjo01,4207000.0
5145,zuninmi01,1027600.0
5146,zupcibo01,431000.0
5147,zuvelpa01,145000.0


In [10]:
salaries_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26428 entries, 0 to 26427
Data columns (total 5 columns):
yearid      26428 non-null int64
teamid      26428 non-null object
lgid        26428 non-null object
playerid    26428 non-null object
salary      26428 non-null float64
dtypes: float64(1), int64(1), object(3)
memory usage: 1.0+ MB


# Combining dataframes 

In [11]:
vandy_people_df = pd.merge(vandy_df, people_df, how = 'left', on = 'playerid')
vandy_people_df

Unnamed: 0,playerid,schoolid,yearid,birthyear,birthmonth,birthday,birthcountry,birthstate,birthcity,deathyear,...,namelast,namegiven,weight,height,bats,throws,debut,finalgame,retroid,bbrefid
0,alvarpe01,vandy,2006,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Alvarez,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01
1,alvarpe01,vandy,2007,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Alvarez,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01
2,alvarpe01,vandy,2008,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Alvarez,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01
3,baxtemi01,vandy,2004,1984.0,12.0,7.0,USA,NY,Queens,,...,Baxter,Michael Joseph,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01
4,baxtemi01,vandy,2005,1984.0,12.0,7.0,USA,NY,Queens,,...,Baxter,Michael Joseph,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,willimi01,vandy,1970,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Willis,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01
61,willimi01,vandy,1971,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Willis,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01
62,willimi01,vandy,1972,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Willis,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01
63,zeidjo01,vandy,2006,1987.0,3.0,24.0,USA,CT,New Haven,,...,Zeid,Joshua Alexander,220.0,76.0,R,R,2013-07-30,2014-07-24,zeidj001,zeidjo01


In [12]:
vandy_people_salaries_df = pd.merge(vandy_people_df, total_salaries_df, how = 'left', on = 'playerid')
vandy_people_salaries_df

Unnamed: 0,playerid,schoolid,yearid,birthyear,birthmonth,birthday,birthcountry,birthstate,birthcity,deathyear,...,namegiven,weight,height,bats,throws,debut,finalgame,retroid,bbrefid,salary
0,alvarpe01,vandy,2006,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0
1,alvarpe01,vandy,2007,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0
2,alvarpe01,vandy,2008,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,Pedro Manuel,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0
3,baxtemi01,vandy,2004,1984.0,12.0,7.0,USA,NY,Queens,,...,Michael Joseph,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01,2094418.0
4,baxtemi01,vandy,2005,1984.0,12.0,7.0,USA,NY,Queens,,...,Michael Joseph,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01,2094418.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,willimi01,vandy,1970,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,
61,willimi01,vandy,1971,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,
62,willimi01,vandy,1972,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,Michael Henry,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,
63,zeidjo01,vandy,2006,1987.0,3.0,24.0,USA,CT,New Haven,,...,Joshua Alexander,220.0,76.0,R,R,2013-07-30,2014-07-24,zeidj001,zeidjo01,


In [13]:
vandy_people_salaries_df["Player Name"] = vandy_people_salaries_df["namefirst"] + " " + vandy_people_salaries_df["namelast"]
vandy_people_salaries_df

Unnamed: 0,playerid,schoolid,yearid,birthyear,birthmonth,birthday,birthcountry,birthstate,birthcity,deathyear,...,weight,height,bats,throws,debut,finalgame,retroid,bbrefid,salary,Player Name
0,alvarpe01,vandy,2006,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0,Pedro Alvarez
1,alvarpe01,vandy,2007,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0,Pedro Alvarez
2,alvarpe01,vandy,2008,1987.0,2.0,6.0,D.R.,Distrito Nacional,Santo Domingo,,...,250.0,75.0,L,R,2010-06-16,2016-10-01,alvap001,alvarpe01,20681704.0,Pedro Alvarez
3,baxtemi01,vandy,2004,1984.0,12.0,7.0,USA,NY,Queens,,...,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01,2094418.0,Mike Baxter
4,baxtemi01,vandy,2005,1984.0,12.0,7.0,USA,NY,Queens,,...,205.0,72.0,L,R,2010-09-06,2015-07-08,baxtm001,baxtemi01,2094418.0,Mike Baxter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,willimi01,vandy,1970,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,,Mike Willis
61,willimi01,vandy,1971,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,,Mike Willis
62,willimi01,vandy,1972,1950.0,12.0,26.0,USA,OK,Oklahoma City,,...,205.0,74.0,L,L,1977-04-13,1981-06-11,willm101,willimi01,,Mike Willis
63,zeidjo01,vandy,2006,1987.0,3.0,24.0,USA,CT,New Haven,,...,220.0,76.0,R,R,2013-07-30,2014-07-24,zeidj001,zeidjo01,,Josh Zeid


In [14]:
##using grouby for counting

vandy_salaries_df = vandy_people_salaries_df.groupby('Player Name')['salary'].sum().reset_index()
vandy_salaries_df.columns = ['Player Name','salary']
vandy_salaries_df

Unnamed: 0,Player Name,salary
0,Antoan Richardson,0.0
1,David Price,245553888.0
2,Harvey Hendrick,0.0
3,Jensen Lewis,3702000.0
4,Jeremy Sowers,1154400.0
5,Joey Cora,16867500.0
6,Josh Paul,7920000.0
7,Josh Zeid,0.0
8,Mal Moss,0.0
9,Mark Prior,12800000.0


In [15]:
vandy_salaries_df = vandy_salaries_df.sort_values('salary', ascending = False)
vandy_salaries_df

Unnamed: 0,Player Name,salary
1,David Price,245553888.0
15,Pedro Alvarez,62045112.0
18,Scott Sanderson,21500000.0
12,Mike Minor,20512500.0
5,Joey Cora,16867500.0
9,Mark Prior,12800000.0
17,Ryan Flaherty,12183000.0
6,Josh Paul,7920000.0
22,Sonny Gray,4627500.0
11,Mike Baxter,4188836.0


# Question 2

In [16]:
##read in table
fielding_df = pd.read_sql("SELECT * FROM fielding;", con=engine)
fielding_df.head()

Unnamed: 0,playerid,yearid,stint,teamid,lgid,pos,g,gs,innouts,po,a,e,dp,pb,wp,sb,cs,zr
0,abercda01,1871,1,TRO,,SS,1,,,1,3.0,2.0,0.0,,,,,
1,addybo01,1871,1,RC1,,2B,22,,,67,72.0,42.0,5.0,,,,,
2,addybo01,1871,1,RC1,,SS,3,,,8,14.0,7.0,0.0,,,,,
3,allisar01,1871,1,CL1,,2B,2,,,1,4.0,0.0,0.0,,,,,
4,allisar01,1871,1,CL1,,OF,29,,,51,3.0,7.0,1.0,,,,,


In [17]:
fielding2016_df = fielding_df[fielding_df.yearid == 2016]
fielding2016_df

Unnamed: 0,playerid,yearid,stint,teamid,lgid,pos,g,gs,innouts,po,a,e,dp,pb,wp,sb,cs,zr
134862,abadfe01,2016,1,MIN,AL,P,39,0.0,102.0,0,3.0,0.0,1.0,,,,,
134863,abadfe01,2016,2,BOS,AL,P,18,0.0,38.0,0,1.0,0.0,0.0,,,,,
134864,abreujo02,2016,1,CHA,AL,1B,152,152.0,4067.0,1243,84.0,10.0,131.0,,,,,
134865,achteaj01,2016,1,LAA,AL,P,27,0.0,113.0,2,4.0,0.0,0.0,,,,,
134866,ackledu01,2016,1,NYA,AL,1B,13,10.0,255.0,80,7.0,0.0,7.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136810,zobribe01,2016,1,CHN,NL,2B,119,113.0,2929.0,177,250.0,7.0,52.0,,,,,
136811,zobribe01,2016,1,CHN,NL,OF,46,29.0,859.0,43,1.0,0.0,0.0,,,,,
136812,zobribe01,2016,1,CHN,NL,SS,1,0.0,6.0,0,0.0,0.0,0.0,,,,,
136813,zuninmi01,2016,1,SEA,AL,C,52,48.0,1331.0,400,15.0,0.0,0.0,3.0,,19.0,7.0,


In [19]:
outfield2016_df = fielding2016_df[fielding2016_df.pos == 'OF']
print(outfield2016_df.po.sum())

29560


In [20]:
fielding2016_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1953 entries, 134862 to 136814
Data columns (total 18 columns):
playerid    1953 non-null object
yearid      1953 non-null int64
stint       1953 non-null int64
teamid      1953 non-null object
lgid        1953 non-null object
pos         1953 non-null object
g           1953 non-null int64
gs          1953 non-null float64
innouts     1953 non-null float64
po          1953 non-null int64
a           1953 non-null float64
e           1953 non-null float64
dp          1953 non-null float64
pb          114 non-null float64
wp          0 non-null float64
sb          114 non-null float64
cs          114 non-null float64
zr          0 non-null float64
dtypes: float64(10), int64(4), object(4)
memory usage: 289.9+ KB


In [21]:
infield2016_df = fielding2016_df[fielding2016_df.pos.isin(['SS', '1B', '2B', '3B'])]
print(infield2016_df.po.sum())

58934


In [22]:
battery2016_df = fielding2016_df[fielding2016_df.pos.isin(['P', 'C'])]
print(battery2016_df.po.sum())

41424


# Question 3

In [23]:
teams_df = pd.read_sql("SELECT * FROM teams;", con=engine)
teams_df.head()

Unnamed: 0,yearid,lgid,teamid,franchid,divid,rank,g,ghome,w,l,...,dp,fp,name,park,attendance,bpf,ppf,teamidbr,teamidlahman45,teamidretro
0,1871,,BS1,BNA,,3,31,,20,10,...,,0.838,Boston Red Stockings,South End Grounds I,,103,98,BOS,BS1,BS1
1,1871,,CH1,CNA,,2,28,,19,9,...,,0.829,Chicago White Stockings,Union Base-Ball Grounds,,104,102,CHI,CH1,CH1
2,1871,,CL1,CFC,,8,29,,10,19,...,,0.814,Cleveland Forest Citys,National Association Grounds,,96,100,CLE,CL1,CL1
3,1871,,FW1,KEK,,7,19,,7,12,...,,0.803,Fort Wayne Kekiongas,Hamilton Field,,101,107,KEK,FW1,FW1
4,1871,,NY2,NNA,,5,33,,16,17,...,,0.839,New York Mutuals,Union Grounds (Brooklyn),,90,88,NYU,NY2,NY2


In [24]:
new_teams_df = teams_df.drop(columns = ['lgid', 'franchid', 'divid','rank','g','ghome','divwin','wcwin','lgwin','r','ab','h','h2b','h3b','hr','bb','so','sb','cs','hbp','sf','ra','er','era','cg','sho','sv','ipouts','ha','hra','bba','soa','e','dp','fp','park','attendance','bpf','ppf','teamidbr','teamidlahman45','teamidretro'])
new_teams_df

Unnamed: 0,yearid,teamid,w,l,wswin,name
0,1871,BS1,20,10,,Boston Red Stockings
1,1871,CH1,19,9,,Chicago White Stockings
2,1871,CL1,10,19,,Cleveland Forest Citys
3,1871,FW1,7,12,,Fort Wayne Kekiongas
4,1871,NY2,16,17,,New York Mutuals
...,...,...,...,...,...,...
2830,2016,SLN,86,76,N,St. Louis Cardinals
2831,2016,TBA,68,94,N,Tampa Bay Rays
2832,2016,TEX,95,67,N,Texas Rangers
2833,2016,TOR,89,73,N,Toronto Blue Jays


# 3A

In [25]:
loser_teams_df = new_teams_df[(new_teams_df.yearid >= 1970) & (new_teams_df['wswin'] == 'N')]
print(loser_teams_df['wswin'])                       

1541    N
1543    N
1544    N
1545    N
1546    N
       ..
2830    N
2831    N
2832    N
2833    N
2834    N
Name: wswin, Length: 1220, dtype: object


In [26]:
big_win_df = loser_teams_df.sort_values('w', ascending = False)
big_win_df

Unnamed: 0,yearid,teamid,w,l,wswin,name
2379,2001,SEA,116,46,N,Seattle Mariners
2267,1998,ATL,106,56,N,Atlanta Braves
2471,2004,SLN,105,57,N,St. Louis Cardinals
2125,1993,ATL,104,58,N,Atlanta Braves
2012,1988,OAK,104,58,N,Oakland Athletics
...,...,...,...,...,...,...
1825,1981,MIN,41,68,N,Minnesota Twins
1829,1981,NYN,41,62,N,New York Mets
1833,1981,SDN,41,69,N,San Diego Padres
1818,1981,CHN,38,65,N,Chicago Cubs


# 3B

In [27]:
winning_teams_df = new_teams_df[(new_teams_df.yearid >= 1970) & (new_teams_df['wswin'] == 'Y')]
print(winning_teams_df['wswin'])   

1542    Y
1584    Y
1606    Y
1630    Y
1654    Y
1667    Y
1691    Y
1724    Y
1750    Y
1780    Y
1805    Y
1824    Y
1862    Y
1866    Y
1899    Y
1927    Y
1959    Y
1981    Y
2006    Y
2038    Y
2053    Y
2085    Y
2124    Y
2152    Y
2181    Y
2226    Y
2247    Y
2283    Y
2313    Y
2343    Y
2356    Y
2385    Y
2426    Y
2449    Y
2479    Y
2530    Y
2538    Y
2585    Y
2612    Y
2649    Y
2680    Y
2709    Y
2718    Y
2769    Y
2786    Y
2810    Y
Name: wswin, dtype: object


In [30]:
small_win_df = winning_teams_df.sort_values('w')
small_win_df

Unnamed: 0,yearid,teamid,w,l,wswin,name
1824,1981,LAN,63,47,Y,Los Angeles Dodgers
2530,2006,SLN,83,78,Y,St. Louis Cardinals
1981,1987,MIN,85,77,Y,Minnesota Twins
2343,2000,NYA,87,74,Y,New York Yankees
2769,2014,SFN,88,74,Y,San Francisco Giants
1654,1974,OAK,90,72,Y,Oakland Athletics
2181,1995,ATL,90,54,Y,Atlanta Braves
2680,2011,SLN,90,72,Y,St. Louis Cardinals
2053,1990,CIN,91,71,Y,Cincinnati Reds
2426,2003,FLO,91,71,Y,Florida Marlins


# 3C

# Question4

In [33]:
managers_df = pd.read_sql("SELECT * FROM managers;", con=engine)
managers_df.head()

Unnamed: 0,playerid,yearid,teamid,lgid,inseason,g,w,l,rank,plyrmgr
0,wrighha01,1871,BS1,,1,31,20,10,3.0,Y
1,woodji01,1871,CH1,,1,28,19,9,2.0,Y
2,paborch01,1871,CL1,,1,29,10,19,8.0,Y
3,lennobi01,1871,FW1,,1,14,5,9,8.0,Y
4,deaneha01,1871,FW1,,2,5,2,3,8.0,Y


In [38]:
awards_managers_df = pd.read_sql("SELECT * FROM awardsmanagers;", con=engine)
awards_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
0,larusto01,BBWAA Manager of the Year,1983,AL,,
1,lasorto01,BBWAA Manager of the Year,1983,NL,,
2,andersp01,BBWAA Manager of the Year,1984,AL,,
3,freyji99,BBWAA Manager of the Year,1984,NL,,
4,coxbo01,BBWAA Manager of the Year,1985,AL,,
...,...,...,...,...,...,...
174,willima04,BBWAA Manager of the Year,2014,NL,,
175,banisje01,BBWAA Manager of the Year,2015,AL,,
176,maddojo99,BBWAA Manager of the Year,2015,NL,,
177,francte01,BBWAA Manager of the Year,2016,AL,,\t


In [39]:
al_awards_managers_df = awards_managers_df[awards_managers_df['lgid'].str.contains('AL')]
al_awards_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
0,larusto01,BBWAA Manager of the Year,1983,AL,,
2,andersp01,BBWAA Manager of the Year,1984,AL,,
4,coxbo01,BBWAA Manager of the Year,1985,AL,,
6,mcnamjo99,BBWAA Manager of the Year,1986,AL,,
8,andersp01,BBWAA Manager of the Year,1987,AL,,
...,...,...,...,...,...,...
169,showabu99,TSN Manager of the Year,2014,AL,,
171,molitpa01,TSN Manager of the Year,2015,AL,,
173,showabu99,BBWAA Manager of the Year,2014,AL,,
175,banisje01,BBWAA Manager of the Year,2015,AL,,


In [41]:
altsn_awards_managers_df = al_awards_managers_df[al_awards_managers_df['awardid'].str.contains('TSN')]
altsn_awards_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
114,mcnamjo99,TSN Manager of the Year,1986,AL,,
115,andersp01,TSN Manager of the Year,1987,AL,,
117,larusto01,TSN Manager of the Year,1988,AL,,
119,robinfr02,TSN Manager of the Year,1989,AL,,
122,torboje01,TSN Manager of the Year,1990,AL,,
124,kellyto01,TSN Manager of the Year,1991,AL,,
125,larusto01,TSN Manager of the Year,1992,AL,,
128,oatesjo01,TSN Manager of the Year,1993,AL,,
130,showabu99,TSN Manager of the Year,1994,AL,,
132,hargrmi01,TSN Manager of the Year,1995,AL,,


In [43]:
altsn_managers_df = pd.merge(altsn_awards_managers_df, people_df, how = 'left', on = 'playerid')
altsn_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes,birthyear,birthmonth,birthday,birthcountry,...,namelast,namegiven,weight,height,bats,throws,debut,finalgame,retroid,bbrefid
0,mcnamjo99,TSN Manager of the Year,1986,AL,,,1932.0,6.0,4.0,USA,...,McNamara,John Francis,175.0,70.0,R,R,,,mcnaj801,mcnamjo99
1,andersp01,TSN Manager of the Year,1987,AL,,,1934.0,2.0,22.0,USA,...,Anderson,George Lee,170.0,69.0,R,R,1959-04-10,1959-09-27,andes101,andersp01
2,larusto01,TSN Manager of the Year,1988,AL,,,1944.0,10.0,4.0,USA,...,LaRussa,Anthony,175.0,72.0,R,R,1963-05-10,1973-04-06,larut101,larusto01
3,robinfr02,TSN Manager of the Year,1989,AL,,,1935.0,8.0,31.0,USA,...,Robinson,Frank,183.0,73.0,R,R,1956-04-17,1976-09-18,robif103,robinfr02
4,torboje01,TSN Manager of the Year,1990,AL,,,1941.0,11.0,26.0,USA,...,Torborg,Jeffrey Allen,195.0,72.0,R,R,1964-05-10,1973-09-29,torbj101,torboje01
5,kellyto01,TSN Manager of the Year,1991,AL,,,1950.0,8.0,15.0,USA,...,Kelly,Jay Thomas,188.0,71.0,L,L,1975-05-11,1975-07-11,kellt101,kellyto01
6,larusto01,TSN Manager of the Year,1992,AL,,,1944.0,10.0,4.0,USA,...,LaRussa,Anthony,175.0,72.0,R,R,1963-05-10,1973-04-06,larut101,larusto01
7,oatesjo01,TSN Manager of the Year,1993,AL,,,1946.0,1.0,21.0,USA,...,Oates,Johnny Lane,188.0,71.0,L,R,1970-09-17,1981-05-24,oatej101,oatesjo01
8,showabu99,TSN Manager of the Year,1994,AL,,,1956.0,5.0,23.0,USA,...,Showalter,William Nathaniel,195.0,69.0,L,L,,,showb801,showabu99
9,hargrmi01,TSN Manager of the Year,1995,AL,,,1949.0,10.0,26.0,USA,...,Hargrove,Dudley Michael,195.0,72.0,L,L,1974-04-07,1985-10-06,hargm001,hargrmi01


In [40]:
nl_awards_managers_df = awards_managers_df[awards_managers_df['lgid'].str.contains('NL')]
nl_awards_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
1,lasorto01,BBWAA Manager of the Year,1983,NL,,
3,freyji99,BBWAA Manager of the Year,1984,NL,,
5,herzowh01,BBWAA Manager of the Year,1985,NL,,
7,lanieha01,BBWAA Manager of the Year,1986,NL,,
9,rodgebu01,BBWAA Manager of the Year,1987,NL,,
...,...,...,...,...,...,...
170,willima04,TSN Manager of the Year,2014,NL,,
172,collite99,TSN Manager of the Year,2015,NL,,
174,willima04,BBWAA Manager of the Year,2014,NL,,
176,maddojo99,BBWAA Manager of the Year,2015,NL,,


In [42]:
nltsn_awards_managers_df = nl_awards_managers_df[nl_awards_managers_df['awardid'].str.contains('TSN')]
nltsn_awards_managers_df

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
113,lanieha01,TSN Manager of the Year,1986,NL,,
116,rodgebu01,TSN Manager of the Year,1987,NL,,
118,leylaji99,TSN Manager of the Year,1988,NL,,
120,zimmedo01,TSN Manager of the Year,1989,NL,,
121,leylaji99,TSN Manager of the Year,1990,NL,,
123,coxbo01,TSN Manager of the Year,1991,NL,,
126,leylaji99,TSN Manager of the Year,1992,NL,,
127,coxbo01,TSN Manager of the Year,1993,NL,,
129,aloufe01,TSN Manager of the Year,1994,NL,,
131,baylodo01,TSN Manager of the Year,1995,NL,,


In [34]:
people_df = pd.read_sql("SELECT * FROM people;", con=engine)
people_df.head()

Unnamed: 0,playerid,birthyear,birthmonth,birthday,birthcountry,birthstate,birthcity,deathyear,deathmonth,deathday,...,namelast,namegiven,weight,height,bats,throws,debut,finalgame,retroid,bbrefid
0,aardsda01,1981.0,12.0,27.0,USA,CO,Denver,,,,...,Aardsma,David Allan,215.0,75.0,R,R,2004-04-06,2015-08-23,aardd001,aardsda01
1,aaronha01,1934.0,2.0,5.0,USA,AL,Mobile,,,,...,Aaron,Henry Louis,180.0,72.0,R,R,1954-04-13,1976-10-03,aaroh101,aaronha01
2,aaronto01,1939.0,8.0,5.0,USA,AL,Mobile,1984.0,8.0,16.0,...,Aaron,Tommie Lee,190.0,75.0,R,R,1962-04-10,1971-09-26,aarot101,aaronto01
3,aasedo01,1954.0,9.0,8.0,USA,CA,Orange,,,,...,Aase,Donald William,190.0,75.0,R,R,1977-07-26,1990-10-03,aased001,aasedo01
4,abadan01,1972.0,8.0,25.0,USA,FL,Palm Beach,,,,...,Abad,Fausto Andres,184.0,73.0,L,L,2001-09-10,2006-04-13,abada001,abadan01


ValueError: Arrays were different lengths: 19112 vs 1