# Sports Analytics Basics

Authored by Zeke Weng

### Lesson Plan

* Python
* Pandas
* Numpy

In [None]:
# Import the necessary modules as aliases

import pandas as pd
import numpy as np
import numpy as np

### Pandas Basics

In [None]:
# Creating Series

s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [None]:
s

In [None]:
# Creating DataFrames

df = pd.DataFrame(
    {
        "Basketball": 1.0,
        "Football": pd.Timestamp("20130102"),
        "Hockey": pd.Series(1, index=list(range(4)), dtype="float32"),
        "Soccer": np.array([3] * 4, dtype="int32"),
        "Bouldering": pd.Categorical(["test", "train", "test", "train"])
    }
)

In [None]:
df

In [None]:
# Retrieving data

url = 'https://raw.githubusercontent.com/UBC-MDS/DSCI_522_OlympicMedalPrediction/master/data/athlete_events.csv'
olympics = pd.read_csv(url)

In [None]:
# Observing entries

olympics.head(20)

olympics.tail(20)

In [None]:
# Indexing into Data
olympics.loc[olympics.Age == 19]
olympics.loc[olympics['Team'] == 'United States']
olympics.loc[olympics['Team'] == 'University of Toronto']

olympics.iloc[0:10]
olympics.iloc[-1:]

In [None]:
# Sorting Values

olympics.sort_values(by='Year', ascending=True)
olympics.sort_values(by='Year', ascending=False)

In [None]:
# Boolean Selection

olympics.loc[(olympics['Age'] == 19) & (olympics['Team'] == 'Canada') & (olympics['Year'] == 2016)]
olympics.loc[(olympics['Age'] == 19) | (olympics['Team'] == 'Canada') | (olympics['Year'] == 2016)]

In [None]:
olympics.columns

In [None]:
# Removing Columns

olympics.drop(columns='ID')

In [None]:
olympics

In [None]:
olympics.drop(columns='ID', inplace=True)
olympics

In [None]:
# Cleaning Data

olympics.fillna(0)

In [None]:
olympics['Medal'].fillna(0, inplace=True)

In [None]:
# Removing Rows

olympics.dropna(inplace=True)

In [None]:
# Descriptive Statistics

olympics[['Height', 'Weight']].describe()

In [None]:
# Value Counts
olympics['Medal'].value_counts()

In [None]:
# Vectorized Operations

def convert_to_freedom_units(x):
    return x / 2.54


olympics['Height'] = olympics['Height'].apply(convert_to_freedom_units)
olympics

In [None]:
# Merging / Joining

olympics_M = olympics[olympics['Sex']=='M']
olympics_F = olympics.loc[olympics['Sex'] == 'F']

olympics_M

In [None]:
olympics_F

In [None]:
pd.concat([olympics_M, olympics_F])

In [None]:
# Observing
olympics.Team.unique()

olympics.Team.nunique()

In [None]:
# Grouping Data

olympics.groupby(by='NOC')['Medal'].value_counts()

In [None]:
# Converting to numpy

df.to_numpy()

In [None]:
# Excel

olympics.to_csv('olympics.csv')