# NBA Game Feature Transformation

This notebook focuses on transforming and normalizing our features for better model performance.


## 1. Import Libraries
Import all necessary libraries.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import os
from tqdm import tqdm

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

## 2. Load Engineered Data
Load the engineered data from the previous step.

In [None]:
games_df = pd.read_csv('../data/engineered/engineered_games.csv')
print(f'Loaded {len(games_df)} games')
games_df.head()

## 3. Handle Missing Values
Fill missing values using SimpleImputer.

In [None]:
numeric_cols = games_df.select_dtypes(include=[np.number]).columns
imputer = SimpleImputer(strategy='mean')
games_df[numeric_cols] = imputer.fit_transform(games_df[numeric_cols])
print('Missing values filled.')

## 4. Remove Outliers
Remove outliers using IQR method.

In [None]:
for col in numeric_cols:
    Q1 = games_df[col].quantile(0.25)
    Q3 = games_df[col].quantile(0.75)
    IQR = Q3 - Q1
    games_df = games_df[~((games_df[col] < (Q1 - 1.5 * IQR)) | (games_df[col] > (Q3 + 1.5 * IQR)))]
print('Outliers removed.')

## 5. Feature Scaling
Scale features using StandardScaler.

In [None]:
scaler = StandardScaler()
games_df[numeric_cols] = scaler.fit_transform(games_df[numeric_cols])
print('Features scaled.')

## 6. Save Transformed Data
Save the transformed data for the next step.

In [None]:
games_df.to_csv('../data/transformed/transformed_games.csv', index=False)
print('Transformed data saved to ../data/transformed/transformed_games.csv')