# WSA Pandas Demo Complete

### Imports
* The `pandas` library should already be installed as part of the Anaconda distribution
* Typically abbreviated as `pd`

In [3]:
import pandas as pd

### Reading in a DataFrame
* We will start by creating a dataframe called `df` containing the Michigan Football GameLog data we have stored in our SQL database.
* This data will be exported and read into Jupyter notebook as a `.csv` file.
* Make sure your `.csv` file is stored in the same folder as this notebook.

In [5]:
# .read_csv()
# drop unneeded id column with .drop()
df = pd.read_csv('Michigan_GameLogs_F24.csv').drop(columns=['id'])
df

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
0,2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
1,2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3
2,2011-09-17,2011,Eastern Michigan,Home,W,31,3,7,18,38.9,95,2,5,50,376,2,19,471,0,1
3,2011-09-24,2011,San Diego State,Home,W,28,7,8,17,47.1,93,0,3,45,320,4,14,413,2,2
4,2011-10-01,2011,Minnesota,Home,W,58,0,18,25,72.0,217,3,10,48,363,3,19,580,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,2024-09-14,2024,Arkansas State,Home,W,28,18,13,18,72.2,134,1,6,44,301,3,14,435,0,3
168,2024-09-21,2024,Southern California,Home,W,27,24,7,12,58.3,32,0,0,46,290,3,12,322,2,0
169,2024-09-28,2024,Minnesota,Home,W,27,24,10,18,55.6,86,1,5,43,155,2,9,241,0,1
170,2024-10-05,2024,Washington,Away,L,17,27,13,25,52.0,113,1,7,37,174,1,9,287,1,1


In [6]:
# .head() shows first 5 rows
df.head()

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
0,2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
1,2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3
2,2011-09-17,2011,Eastern Michigan,Home,W,31,3,7,18,38.9,95,2,5,50,376,2,19,471,0,1
3,2011-09-24,2011,San Diego State,Home,W,28,7,8,17,47.1,93,0,3,45,320,4,14,413,2,2
4,2011-10-01,2011,Minnesota,Home,W,58,0,18,25,72.0,217,3,10,48,363,3,19,580,0,0


In [7]:
# .tail() shows last 5 rows
df.tail()

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
167,2024-09-14,2024,Arkansas State,Home,W,28,18,13,18,72.2,134,1,6,44,301,3,14,435,0,3
168,2024-09-21,2024,Southern California,Home,W,27,24,7,12,58.3,32,0,0,46,290,3,12,322,2,0
169,2024-09-28,2024,Minnesota,Home,W,27,24,10,18,55.6,86,1,5,43,155,2,9,241,0,1
170,2024-10-05,2024,Washington,Away,L,17,27,13,25,52.0,113,1,7,37,174,1,9,287,1,1
171,2024-10-19,2024,Illinois,Away,L,7,21,20,32,62.5,208,0,8,38,114,1,10,322,2,1


In [8]:
# .head(n) shows first n rows
df.head(2)

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
0,2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
1,2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3


### Data Preprocessing

In [10]:
# .info() provides essential details for your dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172 entries, 0 to 171
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            172 non-null    object 
 1   year            172 non-null    int64  
 2   opponent        172 non-null    object 
 3   home_away       172 non-null    object 
 4   result          172 non-null    object 
 5   points_scored   172 non-null    int64  
 6   points_against  172 non-null    int64  
 7   pass_cmp        172 non-null    int64  
 8   pass_att        172 non-null    int64  
 9   pass_pct        172 non-null    float64
 10  pass_yrds       172 non-null    int64  
 11  pass_td         172 non-null    int64  
 12  pass_1st_down   172 non-null    int64  
 13  rush_att        172 non-null    int64  
 14  rush_yrds       172 non-null    int64  
 15  rush_td         172 non-null    int64  
 16  rush_1st_down   172 non-null    int64  
 17  total_offense   172 non-null    int

In [11]:
# .shape shows dimensions of df (num_rows, num_cols)
df.shape

(172, 20)

#### Null and Duplicate Values

In [13]:
# We can append dataframes to each other using pd.concat()
# It is good practice to never change or update the original df directly
# Always create copies!

temp_df = pd.concat([df,df])
temp_df.shape

(344, 20)

In [14]:
# We can also remove duplicate values with .drop_duplicates()
temp_df.drop_duplicates().shape

(172, 20)

In [15]:
# .isnull() checks for NULL or empty datapoints 
# returns True for any values that are NULL
df.isnull()

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
168,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
169,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
170,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [16]:
# .sum() gets a summary of the above df
df.isnull().sum()

date              0
year              0
opponent          0
home_away         0
result            0
points_scored     0
points_against    0
pass_cmp          0
pass_att          0
pass_pct          0
pass_yrds         0
pass_td           0
pass_1st_down     0
rush_att          0
rush_yrds         0
rush_td           0
rush_1st_down     0
total_offense     0
fumbles           0
ints              0
dtype: int64

In [17]:
# If we did have null values, we could use the .dropna() function to remove all rows with null values
df.dropna()

Unnamed: 0,date,year,opponent,home_away,result,points_scored,points_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_td,pass_1st_down,rush_att,rush_yrds,rush_td,rush_1st_down,total_offense,fumbles,ints
0,2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
1,2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3
2,2011-09-17,2011,Eastern Michigan,Home,W,31,3,7,18,38.9,95,2,5,50,376,2,19,471,0,1
3,2011-09-24,2011,San Diego State,Home,W,28,7,8,17,47.1,93,0,3,45,320,4,14,413,2,2
4,2011-10-01,2011,Minnesota,Home,W,58,0,18,25,72.0,217,3,10,48,363,3,19,580,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,2024-09-14,2024,Arkansas State,Home,W,28,18,13,18,72.2,134,1,6,44,301,3,14,435,0,3
168,2024-09-21,2024,Southern California,Home,W,27,24,7,12,58.3,32,0,0,46,290,3,12,322,2,0
169,2024-09-28,2024,Minnesota,Home,W,27,24,10,18,55.6,86,1,5,43,155,2,9,241,0,1
170,2024-10-05,2024,Washington,Away,L,17,27,13,25,52.0,113,1,7,37,174,1,9,287,1,1


#### Modifying DataFrame Columns

In [19]:
# check column information with .columns
df.columns

Index(['date', 'year', 'opponent', 'home_away', 'result', 'points_scored',
       'points_against', 'pass_cmp', 'pass_att', 'pass_pct', 'pass_yrds',
       'pass_td', 'pass_1st_down', 'rush_att', 'rush_yrds', 'rush_td',
       'rush_1st_down', 'total_offense', 'fumbles', 'ints'],
      dtype='object')

In [20]:
# .rename() allows us to rename our columns
df.rename(columns = {
    'home_away' : 'location',
    'points_scored' : 'pts_scored',
    'points_against' : 'pts_against',
    'pass_td' : 'pass_tds',
    'pass_1st_down' : 'pass_first_downs',
    'rush_td' : 'rush_tds',
    'rush_1st_down' : 'rush_first_downs'
}, inplace = True)
df.columns

Index(['date', 'year', 'opponent', 'location', 'result', 'pts_scored',
       'pts_against', 'pass_cmp', 'pass_att', 'pass_pct', 'pass_yrds',
       'pass_tds', 'pass_first_downs', 'rush_att', 'rush_yrds', 'rush_tds',
       'rush_first_downs', 'total_offense', 'fumbles', 'ints'],
      dtype='object')

In [21]:
# Even though the values of year are numeric, we are treating it as a categorical variable
# We can change year to an object type using .astype()
df.year = df.year.astype('object') 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172 entries, 0 to 171
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              172 non-null    object 
 1   year              172 non-null    object 
 2   opponent          172 non-null    object 
 3   location          172 non-null    object 
 4   result            172 non-null    object 
 5   pts_scored        172 non-null    int64  
 6   pts_against       172 non-null    int64  
 7   pass_cmp          172 non-null    int64  
 8   pass_att          172 non-null    int64  
 9   pass_pct          172 non-null    float64
 10  pass_yrds         172 non-null    int64  
 11  pass_tds          172 non-null    int64  
 12  pass_first_downs  172 non-null    int64  
 13  rush_att          172 non-null    int64  
 14  rush_yrds         172 non-null    int64  
 15  rush_tds          172 non-null    int64  
 16  rush_first_downs  172 non-null    int64  
 1

### Indexing into our DataFrame

To access a column, we can use bracket notation.

In [24]:
# .describe() gets us a numerical summary of each column that uses numbers
# .round() formats the output
df.describe().round(3)

Unnamed: 0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
count,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0,172.0
mean,32.227,18.895,16.523,27.419,60.609,210.773,1.488,9.413,39.483,183.407,2.267,9.436,394.18,0.5,0.837
std,14.409,12.427,5.619,8.271,11.19,82.726,1.3,3.766,8.507,91.127,1.692,4.141,106.905,0.729,0.947
min,0.0,0.0,7.0,8.0,33.3,32.0,0.0,0.0,15.0,-48.0,0.0,0.0,158.0,0.0,0.0
25%,21.0,10.0,13.0,22.0,52.5,159.5,0.0,7.0,34.0,117.0,1.0,6.75,311.5,0.0,0.0
50%,31.0,17.0,16.0,27.0,60.0,206.0,1.0,9.0,39.5,168.5,2.0,9.0,402.5,0.0,1.0
75%,42.0,26.25,20.0,32.0,68.375,250.25,2.0,11.25,45.0,249.75,3.0,12.0,460.25,1.0,1.0
max,78.0,62.0,33.0,52.0,87.5,503.0,6.0,21.0,61.0,486.0,9.0,20.0,751.0,3.0,5.0


In [25]:
df['pts_scored'].describe()

count    172.000000
mean      32.226744
std       14.409256
min        0.000000
25%       21.000000
50%       31.000000
75%       42.000000
max       78.000000
Name: pts_scored, dtype: float64

In [26]:
# This returns a series
df['opponent']

0         Western Michigan
1               Notre Dame
2         Eastern Michigan
3          San Diego State
4                Minnesota
              ...         
167         Arkansas State
168    Southern California
169              Minnesota
170             Washington
171               Illinois
Name: opponent, Length: 172, dtype: object

In [27]:
# This returns a df
df[['opponent']]

Unnamed: 0,opponent
0,Western Michigan
1,Notre Dame
2,Eastern Michigan
3,San Diego State
4,Minnesota
...,...
167,Arkansas State
168,Southern California
169,Minnesota
170,Washington


In [28]:
# This returns a df with multiple columns
df[['opponent', 'result']]

Unnamed: 0,opponent,result
0,Western Michigan,W
1,Notre Dame,W
2,Eastern Michigan,W
3,San Diego State,W
4,Minnesota,W
...,...,...
167,Arkansas State,W
168,Southern California,W
169,Minnesota,W
170,Washington,L


In [29]:
# .value_counts() lets us count the number of times a specific value is in the column
# using .head(n) to show the n most frequent opponents
df['opponent'].value_counts().head(20)

opponent
Michigan State      13
Ohio State          12
Penn State          11
Indiana             11
Rutgers             10
Minnesota            9
Maryland             9
Iowa                 8
Northwestern         7
Nebraska             7
Wisconsin            6
Illinois             6
Notre Dame           6
Purdue               5
Washington           3
Florida              3
Western Michigan     3
Alabama              3
Connecticut          2
South Carolina       2
Name: count, dtype: int64

In [30]:
# .corr(numeric_only=true) shows us the correlation between any combination of columns that have numbers
# Values closer to 1 indicate a higher correlation between variables
corr_df = df.corr(numeric_only=True)
corr_df

Unnamed: 0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
pts_scored,1.0,-0.306179,0.098779,-0.210718,0.510223,0.361098,0.543833,0.231234,0.397414,0.598019,0.727049,0.510027,0.789188,-0.228934,-0.37045
pts_against,-0.306179,1.0,0.240822,0.459252,-0.277354,0.24424,0.020559,0.221087,-0.341835,-0.431584,-0.347385,-0.371176,-0.178888,0.176112,0.303079
pass_cmp,0.098779,0.240822,1.0,0.843498,0.42393,0.744645,0.335489,0.825911,-0.255992,-0.357629,-0.173476,-0.226509,0.27138,-0.00856,-0.033344
pass_att,-0.210718,0.459252,0.843498,1.0,-0.102644,0.588409,0.144588,0.671368,-0.374491,-0.531696,-0.401626,-0.39585,0.002103,0.140538,0.164747
pass_pct,0.510223,-0.277354,0.42393,-0.102644,1.0,0.387924,0.349843,0.403084,0.108266,0.201247,0.334109,0.217803,0.471732,-0.248519,-0.304671
pass_yrds,0.361098,0.24424,0.744645,0.588409,0.387924,1.0,0.561841,0.859077,-0.191715,-0.246671,0.028424,-0.174737,0.563563,-0.047725,0.016093
pass_tds,0.543833,0.020559,0.335489,0.144588,0.349843,0.561841,1.0,0.424402,-0.025666,0.068164,-0.049086,0.082966,0.492873,-0.141832,-0.106023
pass_first_downs,0.231234,0.221087,0.825911,0.671368,0.403084,0.859077,0.424402,1.0,-0.189504,-0.298617,-0.033938,-0.255704,0.410232,-0.071303,0.004193
rush_att,0.397414,-0.341835,-0.255992,-0.374491,0.108266,-0.191715,-0.025666,-0.189504,1.0,0.670979,0.447171,0.747992,0.423596,0.029684,-0.25072
rush_yrds,0.598019,-0.431584,-0.357629,-0.531696,0.201247,-0.246671,0.068164,-0.298617,0.670979,1.0,0.661291,0.855832,0.661532,-0.110842,-0.262555


#### Accessing Rows of a DataFrame

There are two ways to access a row using indices:
* `loc[]` requires label indexing 
* `iloc[]` uses integer indexing

In [33]:
# .set_index() to existing column
alt_df = df.set_index('date')
alt_df.head(5)

Unnamed: 0_level_0,year,opponent,location,result,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3
2011-09-17,2011,Eastern Michigan,Home,W,31,3,7,18,38.9,95,2,5,50,376,2,19,471,0,1
2011-09-24,2011,San Diego State,Home,W,28,7,8,17,47.1,93,0,3,45,320,4,14,413,2,2
2011-10-01,2011,Minnesota,Home,W,58,0,18,25,72.0,217,3,10,48,363,3,19,580,0,0


In [34]:
# Returns a Series
row1_loc = alt_df.loc['2011-09-10']
row1_loc

year                      2011
opponent            Notre Dame
location                  Home
result                       W
pts_scored                  35
pts_against                 31
pass_cmp                    11
pass_att                    24
pass_pct                  45.8
pass_yrds                  338
pass_tds                     4
pass_first_downs            10
rush_att                    26
rush_yrds                  114
rush_tds                     1
rush_first_downs             5
total_offense              452
fumbles                      0
ints                         3
Name: 2011-09-10, dtype: object

In [35]:
row1_iloc = alt_df.iloc[1]
row1_iloc

year                      2011
opponent            Notre Dame
location                  Home
result                       W
pts_scored                  35
pts_against                 31
pass_cmp                    11
pass_att                    24
pass_pct                  45.8
pass_yrds                  338
pass_tds                     4
pass_first_downs            10
rush_att                    26
rush_yrds                  114
rush_tds                     1
rush_first_downs             5
total_offense              452
fumbles                      0
ints                         3
Name: 2011-09-10, dtype: object

In [36]:
# To access multiple rows, we can slice the df
hoke_era = df.iloc[:51]
hoke_era

Unnamed: 0,date,year,opponent,location,result,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
0,2011-09-03,2011,Western Michigan,Home,W,34,10,9,13,69.2,98,0,5,26,190,3,9,288,0,0
1,2011-09-10,2011,Notre Dame,Home,W,35,31,11,24,45.8,338,4,10,26,114,1,5,452,0,3
2,2011-09-17,2011,Eastern Michigan,Home,W,31,3,7,18,38.9,95,2,5,50,376,2,19,471,0,1
3,2011-09-24,2011,San Diego State,Home,W,28,7,8,17,47.1,93,0,3,45,320,4,14,413,2,2
4,2011-10-01,2011,Minnesota,Home,W,58,0,18,25,72.0,217,3,10,48,363,3,19,580,0,0
5,2011-10-08,2011,Northwestern,Away,W,42,24,19,28,67.9,362,2,14,50,179,4,8,541,0,3
6,2011-10-15,2011,Michigan State,Away,L,14,28,12,31,38.7,168,1,6,36,82,1,8,250,0,1
7,2011-10-29,2011,Purdue,Home,W,36,14,10,17,58.8,196,0,7,53,339,4,17,535,0,2
8,2011-11-05,2011,Iowa,Away,L,16,24,18,38,47.4,196,2,10,37,127,0,10,323,1,1
9,2011-11-12,2011,Illinois,Away,W,31,14,8,15,53.3,139,1,6,48,223,3,8,362,2,1


To access a specific value within a df, we access by row, and then by column.

In [38]:
rush_yrds_natty = df.iloc[164]['rush_yrds']
rush_yrds_natty

303

In [39]:
rush_off_corr = corr_df['rush_yrds']['total_offense']
rush_off_corr

0.6615323775014653

### Analyzing Data With `groupby()`

In [41]:
# We can use the .groupby() function to form groups within our df
years = df.groupby('year')

In [42]:
# We can access each of the groups using the .get_group() function
games2023 = years.get_group(2023)
games2023

Unnamed: 0,date,year,opponent,location,result,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
150,2023-09-02,2023,East Carolina,Home,W,30,3,26,31,83.9,280,3,18,31,122,1,4,402,0,0
151,2023-09-09,2023,Nevada-Las Vegas,Home,W,35,7,23,28,82.1,313,2,14,33,179,3,8,492,0,1
152,2023-09-16,2023,Bowling Green State,Home,W,31,6,8,13,61.5,143,2,5,31,169,2,10,312,1,3
153,2023-09-23,2023,Rutgers,Home,W,31,7,15,21,71.4,214,1,9,40,201,2,11,415,0,0
154,2023-09-30,2023,Nebraska,Away,W,45,7,16,23,69.6,187,3,10,51,249,3,15,436,0,0
155,2023-10-07,2023,Minnesota,Away,W,52,10,16,23,69.6,241,1,9,33,191,4,8,432,0,0
156,2023-10-14,2023,Indiana,Home,W,52,7,19,22,86.4,244,4,13,42,163,3,9,407,0,0
157,2023-10-21,2023,Michigan State,Away,W,49,0,28,35,80.0,357,4,18,34,120,2,6,477,0,0
158,2023-11-04,2023,Purdue,Home,W,41,13,24,37,64.9,335,0,15,34,110,5,5,445,1,0
159,2023-11-11,2023,Penn State,Away,W,24,15,7,8,87.5,60,0,4,46,227,3,10,287,0,0


In [43]:
# We can also use .groupby() paired with other functions to get some key calculations
# Ex: mean of grouped W-L results
results = df.groupby('result', sort=False)
results.mean(numeric_only=True).round(4)

Unnamed: 0_level_0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
W,37.5242,13.9758,15.9032,25.0161,63.3935,210.6694,1.6855,9.2177,41.7984,214.9677,2.7742,10.629,425.6371,0.4032,0.6048
L,18.5417,31.6042,18.125,33.625,53.4167,211.0417,0.9792,9.9167,33.5,101.875,0.9583,6.3542,312.9167,0.75,1.4375


In [44]:
# .mean() with single column
years.mean(numeric_only=True)['rush_tds']

year
2011    2.384615
2012    2.076923
2013    2.076923
2014    1.416667
2015    2.076923
2016    3.153846
2017    2.000000
2018    2.000000
2019    2.000000
2020    2.000000
2021    2.785714
2022    2.928571
2023    2.666667
2024    1.428571
Name: rush_tds, dtype: float64

In [45]:
# .sum()
results.sum(numeric_only=True)

Unnamed: 0_level_0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
W,4653,1733,1972,3102,7860.8,26123,209,1143,5183,26656,344,1318,52779,50,75
L,890,1517,870,1614,2564.0,10130,47,476,1608,4890,46,305,15020,36,69


In [46]:
# .max()
results.max(numeric_only=True)

Unnamed: 0_level_0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
W,78,47,33,48,87.5,503,6,20,61,486,9,20,751,3,3
L,45,62,32,52,69.7,451,4,21,54,292,3,13,603,3,5


In [47]:
# .min()
results.min(numeric_only=True)

Unnamed: 0_level_0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
W,10,0,7,8,38.9,32,0,0,26,56,0,3,184,0,0
L,0,13,9,20,33.3,88,0,3,15,-48,0,0,158,0,0


In [48]:
# .median()
results.median(numeric_only=True)

Unnamed: 0_level_0,pts_scored,pts_against,pass_cmp,pass_att,pass_pct,pass_yrds,pass_tds,pass_first_downs,rush_att,rush_yrds,rush_tds,rush_first_downs,total_offense,fumbles,ints
result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
W,35.0,13.0,16.0,25.0,63.8,208.0,2.0,9.0,41.5,198.5,3.0,10.0,433.5,0.0,0.0
L,16.0,30.0,18.0,33.0,53.0,199.0,1.0,9.0,35.0,96.5,1.0,6.0,299.5,1.0,1.0
