# College Basketball Analysis

This project explores NCAA college basketball team statistics from a recent season using **pandas**, **numpy**, and **seaborn**. It answers questions about team performance, efficiency, and rankings to showcase fundamental data analysis skills.


In [1]:
# Import libraries 

import pandas as pd
import numpy as np
import seaborn as sns

In [5]:
# Load the dataset

basketballData = pd.read_excel('../Data/CollegeBasketball.xlsx')
basketballData.head()

Unnamed: 0,TEAM,CONF,G,W,ADJOE,ADJDE,BARTHAG,EFG%,EFGD%,TOR,...,FTR,FTRD,2P_O,2P_D,3P_O,3P_D,ADJ_T,WAB,POSTSEASON,SEED
0,Houston,B12,34,30,119.2,85.5,0.9785,49.7,44.0,13.7,...,29.9,39.0,48.4,43.4,34.7,30.0,63.3,10.6,S16,1.0
1,Connecticut,BE,34,31,127.1,93.6,0.9712,57.1,45.1,14.9,...,33.3,32.5,58.5,43.7,36.7,31.9,64.6,11.3,Champions,1.0
2,Purdue,B10,33,29,126.2,94.7,0.9644,56.0,47.7,16.5,...,42.8,23.0,53.2,48.1,40.8,31.4,67.6,11.0,2ND,1.0
3,North Carolina,ACC,34,27,116.8,93.2,0.9305,51.3,46.4,14.4,...,36.8,28.3,50.3,46.0,35.4,31.4,70.4,6.6,S16,1.0
4,Iowa St.,B12,34,27,113.6,86.5,0.9583,51.9,47.1,15.7,...,36.1,35.2,51.7,46.9,34.9,31.5,67.6,6.9,S16,2.0


# Data Exploration

In [None]:
# What are the column names in the dataset?
basketballData.columns

Index(['TEAM', 'CONF', 'G', 'W', 'ADJOE', 'ADJDE', 'BARTHAG', 'EFG%', 'EFGD%',
       'TOR', 'TORD', 'ORB', 'DRB', 'FTR', 'FTRD', '2P_O', '2P_D', '3P_O',
       '3P_D', 'ADJ_T', 'WAB', 'POSTSEASON', 'SEED'],
      dtype='object')

In [11]:
# How many rows and columns are in the dataset?
basketballData.shape

(725, 23)

In [14]:
# What are the data types of each column?
basketballData.dtypes

TEAM           object
CONF           object
G               int64
W               int64
ADJOE         float64
ADJDE         float64
BARTHAG       float64
EFG%          float64
EFGD%         float64
TOR           float64
TORD          float64
ORB           float64
DRB           float64
FTR           float64
FTRD          float64
2P_O          float64
2P_D          float64
3P_O          float64
3P_D          float64
ADJ_T         float64
WAB           float64
POSTSEASON     object
SEED          float64
dtype: object

In [16]:
# Verify if there are any missing values in the dataset
basketballData.isnull().sum()

TEAM            0
CONF            0
G               0
W               0
ADJOE           0
ADJDE           0
BARTHAG         0
EFG%            0
EFGD%           0
TOR             0
TORD            0
ORB             0
DRB             0
FTR             0
FTRD            0
2P_O            0
2P_D            0
3P_O            0
3P_D            0
ADJ_T           0
WAB             0
POSTSEASON    589
SEED          589
dtype: int64

In [17]:
basketballData.describe()

Unnamed: 0,G,W,ADJOE,ADJDE,BARTHAG,EFG%,EFGD%,TOR,TORD,ORB,DRB,FTR,FTRD,2P_O,2P_D,3P_O,3P_D,ADJ_T,WAB,SEED
count,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,725.0,136.0
mean,31.227586,17.008276,104.652276,104.652414,0.49357,50.369517,50.517931,17.693517,17.639172,28.620276,28.795862,32.28731,32.455862,50.190207,50.294897,33.797655,33.930207,67.310897,-8.056097,8.779412
std,2.271444,6.009195,7.148313,6.108328,0.252749,2.88052,2.545971,2.081197,2.359022,4.031158,2.912378,4.672694,5.445335,3.289986,2.886335,2.418862,2.170797,2.470434,6.864529,4.675692
min,21.0,1.0,85.1,85.5,0.0283,41.0,41.3,12.0,11.2,17.0,20.2,20.9,16.2,40.4,40.8,24.7,26.5,58.7,-24.8,1.0
25%,30.0,13.0,99.7,100.3,0.2762,48.6,48.9,16.3,16.1,25.9,26.7,29.1,28.5,48.1,48.4,32.2,32.5,65.7,-13.0,5.0
50%,31.0,17.0,104.1,104.9,0.4655,50.6,50.5,17.7,17.4,28.6,28.8,32.0,31.8,50.2,50.4,33.8,34.0,67.2,-8.8,9.0
75%,33.0,21.0,109.5,109.4,0.7288,52.1,52.2,18.9,18.9,31.5,30.7,35.2,35.7,52.2,52.2,35.6,35.4,68.9,-3.1,13.0
max,39.0,35.0,127.1,120.7,0.9785,59.9,58.2,24.0,26.0,41.9,38.6,50.0,51.4,62.0,59.0,41.2,41.0,75.1,11.3,16.0
