# Combine All Parts: Movement Data 2015-2016

This notebook combines the 3 parts and creates the final aggregated CSV with player averages.

**Input files:**
- `pt1_movement_2015-2016.csv` (games 1-600)
- `pt2_movement_2015-2016.csv` (games 601-1200)
- `pt3_movement_2015-2016.csv` (games 1201-1230)

**Output:**
- `movement_2015-2016.csv` - Final aggregated data with player season averages

In [None]:
import pandas as pd

## Step 1: Load all 3 parts

In [None]:
pt1 = pd.read_csv('../../data/pt1_movement_2015-2016.csv')
pt2 = pd.read_csv('../../data/pt2_movement_2015-2016.csv')
pt3 = pd.read_csv('../../data/pt3_movement_2015-2016.csv')

print(f"Part 1: {len(pt1)} rows")
print(f"Part 2: {len(pt2)} rows")
print(f"Part 3: {len(pt3)} rows")

## Step 2: Combine all parts

In [None]:
# Combine all 3 dataframes
combined_df = pd.concat([pt1, pt2, pt3], ignore_index=True)

print(f"Total combined rows: {len(combined_df)}")
print(f"Unique players: {combined_df['personId'].nunique()}")

## Step 3: Aggregate by player (season averages)

In [None]:
# Aggregate by player - calculate average speed and distance per game
player_averages = combined_df.groupby('personId').agg({
    'full_name': 'first',
    'speed': 'mean',
    'distance': 'mean'
}).reset_index()

# Rename columns to match requested format
player_averages.columns = ['id', 'full_name', 'avg_speed_per_game', 'avg_distance_per_game']

# Round to 2 decimal places
player_averages['avg_speed_per_game'] = player_averages['avg_speed_per_game'].round(2)
player_averages['avg_distance_per_game'] = player_averages['avg_distance_per_game'].round(2)

print(f"Aggregated stats for {len(player_averages)} unique players")
player_averages.head(10)

## Step 4: Export final CSV

In [None]:
# Save final aggregated data
output_path = '../../data/movement_2015-2016.csv'
player_averages.to_csv(output_path, index=False)

print(f"Saved final aggregated data to {output_path}")
print(f"Total players: {len(player_averages)}")