# Michigan Football Passing Stats - Pandas & NumPy Basics
This notebook introduces Pandas and NumPy using real Michigan Football passing stats.

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Section 1: Create DataFrame from Michigan Football Data
data = {
    "Rk": [1, 2, 3, 4, 5],
    "Player": ["Davis Warren", "Jack Tuttle", "Alex Orji", "Donovan Edwards", "Semaj Morgan"],
    "Pos": ["QB", "QB", "QB", "RB", "WR"],
    "G": [9, 2, 11, 12, 11],
    "Cmp": [134, 30, 25, 1, 0],
    "Att": [209, 50, 47, 1, 1],
    "Cmp%": [64.1, 60.0, 53.2, 100.0, 0.0],
    "Yds": [1199, 306, 150, 23, 0],
    "TD": [7, 1, 3, 1, 0],
    "TD%": [3.3, 2.0, 6.4, 100.0, 0.0],
    "Int": [9, 2, 2, 0, 0],
    "Int%": [4.3, 4.0, 4.3, 0.0, 0.0],
    "Y/A": [5.7, 6.1, 3.2, 23.0, 0.0],
    "AY/A": [4.47, 4.72, 2.55, 43.0, 0.0],
    "Y/C": [8.9, 10.2, 6.0, 23.0, None],
    "Y/G": [133.2, 153.0, 13.6, 1.9, 0.0],
    "Rate": [114.7, 110.0, 92.6, 623.2, None],
    "Awards": ["", "", "", "", ""]
}

df = pd.DataFrame(data)
print("Michigan Football Passing DataFrame:")
print(df)

In [None]:
# Section 2: Exploring the DataFrame
print("Shape of DataFrame:", df.shape)
print("Columns:", df.columns)

In [None]:
# Print the top 3 rows
print()

In [None]:
# Get basic summary stats
print()

In [None]:
# Section 3: Selecting & Filtering
print("Player Names:")
print(df["Player"])

In [None]:
# Filter the dataframe for only quarterbacks (sorry Semaj and Donovan)
qbs = 
print("Quarterbacks:")
print(qbs)

In [None]:
# Filter the dataframe for only guys who threw for above 250 yards
yards = 
print("Players with > 250 Yards:")
print(yards)

In [None]:
# Section 4: Adding Columns
df["Cmp_Calc"] = (df["Cmp"] / df["Att"]) * 100
print(df[["Player", "Cmp%", "Cmp_Calc"]])

In [None]:
# Add TDs per Game


In [None]:
# Add one more column you think would be interesting!


In [None]:
# Section 5: Sorting & Grouping
print(df.sort_values("Yds", ascending=False)[["Player", "Yds"]])
print(df.groupby("Pos")["Yds"].mean())

In [None]:
# Try it with columns you want to look at!


In [None]:
# Section 6: Handling Missing Data

# How many missing values are there?
print(df.isnull().sum())

In [None]:
# Where are they?
print(df.loc[df.isnull().any(axis=1)])

In [None]:
# How should they be filled (If at all)?
df_filled = df.fillna(# Fill me)
print(df_filled)

In [None]:
# Section 7: NumPy Integration
yards = df["Yds"].to_numpy()
print("Yards array:", yards)

In [None]:
# Print out at least one of each of these: max, min, and standard deviation


In [None]:
# Find the player with the most yards
max_idx = np.argmax(yards)
print("Player with max yards:", df.loc[max_idx, "Player"])

In [None]:
# Find who had the worst TD to INT ratio out of players with more than 1 pass attempt
# Hint: a column has to be added!


In [None]:
# Section 8: Conclusion
print("Done! Go Blue 💙💛🏈")