# Project 1

## What teams spend the most money on players

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df_salaries = pd.read_csv("core/Salaries.csv")
df_salaries

In [None]:
df_1985 = df_salaries[df_salaries['yearID'] == 2016]
salary_by_team = df_1985.groupby('teamID')['salary'].sum().reset_index()
salary_by_team

In [None]:
plt.figure(figsize=(15, 6))  # Optional: Set the figure size
plt.bar(salary_by_team['teamID'], salary_by_team['salary'], color='skyblue')
plt.title(f'Total Salaries by Team for Year 2016')
plt.xlabel('Team')
plt.ylabel('Total Salary')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
df_teams = pd.read_csv("core/Teams.csv")
df_1985 = df_teams[df_teams['yearID'] == 2016]
df_1985_rank = df_1985[["teamID", "Rank", "name"]]
df_1985_rank.sort_values(by=['Rank'])

## How much money was in the league during the years

In [None]:
salary_max = df_salaries.groupby('yearID')['salary'].max().reset_index()
salary_min = df_salaries.groupby('yearID')['salary'].min().reset_index()
salary_min = salary_min[salary_min['salary'] != 0]
merged_salaries = pd.merge(salary_max, salary_min, on='yearID', suffixes=('_max', '_min'))
merged_salaries

In [None]:
plt.figure(figsize=(10, 6))  # Optional: Set the figure size
plt.plot(salary_max['yearID'], salary_max['salary'], color='skyblue', label="highest salary")
plt.plot(salary_min['yearID'], salary_min['salary'], color='red', label="lowest salary")
plt.title(f'Highest and Lowest Salaries in League per Year')
plt.xlabel('Year')
plt.ylabel('Total Salary')
plt.xticks(rotation=45)
plt.legend()

plt.tight_layout()
plt.show()


## Are Baseball players Getting better?

In [74]:
df_batting = pd.read_csv("core/Batting.csv")
df_pitching = pd.read_csv("core/Pitching.csv")

hits_by_year = df_batting.groupby('yearID')['H'].sum().reset_index()
homeruns_by_year = df_batting.groupby('yearID')['HR'].sum().reset_index()
ab_by_year = df_batting.groupby('yearID')['AB'].sum().reset_index()
strikes_by_year = df_batting.groupby('yearID')['SO'].sum().reset_index()
p_strikes_by_year = df_pitching.groupby('yearID')['SO'].sum().reset_index()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(hits_by_year['yearID'], 100* hits_by_year['H'] / ab_by_year['AB'], color='skyblue')
plt.plot(homeruns_by_year['yearID'], 100* homeruns_by_year['HR'] / ab_by_year['AB'], color='green')
plt.plot(p_strikes_by_year['yearID'], 100* p_strikes_by_year['SO'] / ab_by_year['AB'], color='purple')

plt.title('Hit percentage and Homerun %')
plt.xlabel('Year')
plt.ylabel('Hits and HR %')
plt.legend(['Hits', 'Homeruns', 'Strikeouts'])

plt.axvline(x=1985, color='black', linestyle='--')
plt.text(1985, .22, f"Payment data starts", color='black', rotation=0, va='center')

plt.tight_layout()
plt.show()

## Strikeouts over the years

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(strikes_by_year['yearID'], 100*strikes_by_year['SO'] / ab_by_year['AB'], color='red')
plt.plot(p_strikes_by_year['yearID'], 100*p_strikes_by_year['SO'] / ab_by_year['AB'], color='purple', linestyle='dashed')
plt.title('Pitcher and Batter Strikeout %')
plt.xlabel('Year')
plt.ylabel('Strikeout %')
plt.legend(['Batter', 'Pitcher'])

# rules gotten from: https://www.baseball-almanac.com/rulechng.shtml
plt.axvline(x=1901, color='black', linestyle='--')
plt.text(1901, 22, f"Foul strike\n rule was\n adopted", color='black', rotation=0, va='center')

plt.axvline(x=1917, color='black', linestyle='--')
plt.text(1917, 17, f"Spit balls\n outlawed", color='black', rotation=0, va='center')

plt.axvline(x=1968, color='black', linestyle='--')
plt.text(1968, 5, f"The anti-spitball rule was\n rewritten and tightened up", color='black', rotation=0, va='center')

plt.tight_layout()
plt.show()