# TeamsCSV Modification


In [184]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path

# Load file
teams_data_loading = Path("teams.csv")

# Read file
teams_data = pd.read_csv(teams_data_loading)

# Show file

teams_data.head(10)

Unnamed: 0,W;L;W/L%;GB;PS/G;PA/G;SRS;Year;Team
0,52;30;0.634;�;94.4;91.3;2.75;2000;Miami Heat*
1,50;32;0.61;2;92.1;90.7;1.3;2000;New York Knicks*
2,49;33;0.598;3;94.8;93.4;1.02;2000;Philadelphia...
3,41;41;0.5;11;100.1;99.4;0.43;2000;Orlando Magic
4,35;47;0.427;17;99.3;100.1;-1;2000;Boston Celtics
5,31;51;0.378;21;98;99;-1.18;2000;New Jersey Nets
6,29;53;0.354;23;96.6;99.9;-3.47;2000;Washington...
7,Central Division;Central Division;Central Divi...
8,56;26;0.683;�;101.3;96.7;4.15;2000;Indiana Pac...
9,49;33;0.598;7;98.4;95.8;2.33;2000;Charlotte Ho...


In [185]:
# Clean and separate into columns.
teams_data_clean = teams_data['W;L;W/L%;GB;PS/G;PA/G;SRS;Year;Team'].str.split(
    pat=";",
    expand=True)

teams_data_clean = teams_data_clean.rename(columns={
    0:"Wins", 1:"Losses", 2:"Win %", 3:"Games Behind",
    4:"Points Per Game", 5:"Opp. Points Per Game", 6:"SRS",
    7:"End Year",8:"Team"})

# Drop the 'Games Behind' and 'SRS' columns.
teams_data_clean = teams_data_clean.drop(columns=['Games Behind', 'SRS'])

teams_data_clean.head(10)

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,End Year,Team
0,52,30,0.634,94.4,91.3,2000,Miami Heat*
1,50,32,0.61,92.1,90.7,2000,New York Knicks*
2,49,33,0.598,94.8,93.4,2000,Philadelphia 76ers*
3,41,41,0.5,100.1,99.4,2000,Orlando Magic
4,35,47,0.427,99.3,100.1,2000,Boston Celtics
5,31,51,0.378,98,99,2000,New Jersey Nets
6,29,53,0.354,96.6,99.9,2000,Washington Wizards
7,Central Division,Central Division,Central Division,Central Division,Central Division,2000,Central Division
8,56,26,0.683,101.3,96.7,2000,Indiana Pacers*
9,49,33,0.598,98.4,95.8,2000,Charlotte Hornets*


In [186]:
# Use query to drop the rows with Division associated.
division_drop = teams_data_clean.query(
    "Wins != ['Central Division', 'Southwest Division', 'Northwest Division','Pacific Division', 'Southeast Division', 'Midwest Division']")

division_drop.head(10)

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,End Year,Team
0,52,30,0.634,94.4,91.3,2000,Miami Heat*
1,50,32,0.61,92.1,90.7,2000,New York Knicks*
2,49,33,0.598,94.8,93.4,2000,Philadelphia 76ers*
3,41,41,0.5,100.1,99.4,2000,Orlando Magic
4,35,47,0.427,99.3,100.1,2000,Boston Celtics
5,31,51,0.378,98.0,99.0,2000,New Jersey Nets
6,29,53,0.354,96.6,99.9,2000,Washington Wizards
8,56,26,0.683,101.3,96.7,2000,Indiana Pacers*
9,49,33,0.598,98.4,95.8,2000,Charlotte Hornets*
10,45,37,0.549,97.2,97.3,2000,Toronto Raptors*


In [187]:
# Convert data types in set.

convert_dict = {'Wins': int,
                'Losses': int,
                'Win %': float,
                'Points Per Game':float,
                'Opp. Points Per Game':float,
                'End Year':int}
data_type_switch = division_drop.astype(convert_dict)
data_type_switch.dtypes


Wins                      int32
Losses                    int32
Win %                   float64
Points Per Game         float64
Opp. Points Per Game    float64
End Year                  int32
Team                     object
dtype: object

In [188]:
#Create a new column and recreate Year to better fit all data in project (Call it Season Year).
df = data_type_switch.assign(Begin_year=data_type_switch['End Year']-1)

data_type_switch_year = df.astype(
    {'End Year': str,
     'Begin_year': str})

data_type_switch_year['Season Year'] = data_type_switch_year['Begin_year'] + "-" + data_type_switch_year['End Year']

data_type_switch_year.head()

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,End Year,Team,Begin_year,Season Year
0,52,30,0.634,94.4,91.3,2000,Miami Heat*,1999,1999-2000
1,50,32,0.61,92.1,90.7,2000,New York Knicks*,1999,1999-2000
2,49,33,0.598,94.8,93.4,2000,Philadelphia 76ers*,1999,1999-2000
3,41,41,0.5,100.1,99.4,2000,Orlando Magic,1999,1999-2000
4,35,47,0.427,99.3,100.1,2000,Boston Celtics,1999,1999-2000


In [189]:
# Drop 'End Year' and 'Begin_year' column from data.
new_season_data = data_type_switch_year.drop(columns=['End Year','Begin_year'])

new_season_data.head()


Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Team,Season Year
0,52,30,0.634,94.4,91.3,Miami Heat*,1999-2000
1,50,32,0.61,92.1,90.7,New York Knicks*,1999-2000
2,49,33,0.598,94.8,93.4,Philadelphia 76ers*,1999-2000
3,41,41,0.5,100.1,99.4,Orlando Magic,1999-2000
4,35,47,0.427,99.3,100.1,Boston Celtics,1999-2000


In [190]:
# Create new DataFrame with results.

teams_data_complete = pd.DataFrame(data= new_season_data)

teams_data_complete.head(10)

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Team,Season Year
0,52,30,0.634,94.4,91.3,Miami Heat*,1999-2000
1,50,32,0.61,92.1,90.7,New York Knicks*,1999-2000
2,49,33,0.598,94.8,93.4,Philadelphia 76ers*,1999-2000
3,41,41,0.5,100.1,99.4,Orlando Magic,1999-2000
4,35,47,0.427,99.3,100.1,Boston Celtics,1999-2000
5,31,51,0.378,98.0,99.0,New Jersey Nets,1999-2000
6,29,53,0.354,96.6,99.9,Washington Wizards,1999-2000
8,56,26,0.683,101.3,96.7,Indiana Pacers*,1999-2000
9,49,33,0.598,98.4,95.8,Charlotte Hornets*,1999-2000
10,45,37,0.549,97.2,97.3,Toronto Raptors*,1999-2000


In [191]:
#Reset index
teams_data_complete = teams_data_complete.reset_index()

teams_data_complete = teams_data_complete.drop(columns='index')

teams_data_complete

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Team,Season Year
0,52,30,0.634,94.4,91.3,Miami Heat*,1999-2000
1,50,32,0.610,92.1,90.7,New York Knicks*,1999-2000
2,49,33,0.598,94.8,93.4,Philadelphia 76ers*,1999-2000
3,41,41,0.500,100.1,99.4,Orlando Magic,1999-2000
4,35,47,0.427,99.3,100.1,Boston Celtics,1999-2000
...,...,...,...,...,...,...,...
590,53,29,0.646,113.9,109.1,Houston Rockets*,2018-2019
591,48,34,0.585,111.7,110.0,San Antonio Spurs*,2018-2019
592,33,49,0.402,103.5,106.1,Memphis Grizzlies,2018-2019
593,33,49,0.402,115.4,116.8,New Orleans Pelicans,2018-2019


In [192]:
# Use split to remove the asterisk from the Team section.
team_playoff_split = teams_data_complete['Team'].str.split(
    pat="*",
    expand=True)

team_playoff_split = team_playoff_split.drop(columns=1)

team_playoff_split = team_playoff_split.rename(columns={0:'Teams'})

team_playoff_split

Unnamed: 0,Teams
0,Miami Heat
1,New York Knicks
2,Philadelphia 76ers
3,Orlando Magic
4,Boston Celtics
...,...
590,Houston Rockets
591,San Antonio Spurs
592,Memphis Grizzlies
593,New Orleans Pelicans


In [193]:
# Merge the new team data to the original dataframe.
teams_data_complete = pd.merge(
    teams_data_complete,
    team_playoff_split,
    left_index=True,
    right_index=True)

teams_data_complete

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Team,Season Year,Teams
0,52,30,0.634,94.4,91.3,Miami Heat*,1999-2000,Miami Heat
1,50,32,0.610,92.1,90.7,New York Knicks*,1999-2000,New York Knicks
2,49,33,0.598,94.8,93.4,Philadelphia 76ers*,1999-2000,Philadelphia 76ers
3,41,41,0.500,100.1,99.4,Orlando Magic,1999-2000,Orlando Magic
4,35,47,0.427,99.3,100.1,Boston Celtics,1999-2000,Boston Celtics
...,...,...,...,...,...,...,...,...
590,53,29,0.646,113.9,109.1,Houston Rockets*,2018-2019,Houston Rockets
591,48,34,0.585,111.7,110.0,San Antonio Spurs*,2018-2019,San Antonio Spurs
592,33,49,0.402,103.5,106.1,Memphis Grizzlies,2018-2019,Memphis Grizzlies
593,33,49,0.402,115.4,116.8,New Orleans Pelicans,2018-2019,New Orleans Pelicans


In [194]:
# Remove the original Team column.
teams_data_complete = teams_data_complete.drop(columns={'Team'})

teams_data_complete.head()

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Season Year,Teams
0,52,30,0.634,94.4,91.3,1999-2000,Miami Heat
1,50,32,0.61,92.1,90.7,1999-2000,New York Knicks
2,49,33,0.598,94.8,93.4,1999-2000,Philadelphia 76ers
3,41,41,0.5,100.1,99.4,1999-2000,Orlando Magic
4,35,47,0.427,99.3,100.1,1999-2000,Boston Celtics


In [197]:
# Rename the Teams column Team.
teams_data_complete = teams_data_complete.rename(columns={'Teams':'Team'})

teams_data_complete.head()

Unnamed: 0,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Season Year,Team
0,52,30,0.634,94.4,91.3,1999-2000,Miami Heat
1,50,32,0.61,92.1,90.7,1999-2000,New York Knicks
2,49,33,0.598,94.8,93.4,1999-2000,Philadelphia 76ers
3,41,41,0.5,100.1,99.4,1999-2000,Orlando Magic
4,35,47,0.427,99.3,100.1,1999-2000,Boston Celtics


In [198]:
# Reindex
teams_data_complete = teams_data_complete.reindex(
    ['Team', 'Wins', 'Losses', 'Win %', 'Points Per Game',
    'Opp. Points Per Game', 'Season Year'], axis=1)

teams_data_complete


Unnamed: 0,Team,Wins,Losses,Win %,Points Per Game,Opp. Points Per Game,Season Year
0,Miami Heat,52,30,0.634,94.4,91.3,1999-2000
1,New York Knicks,50,32,0.610,92.1,90.7,1999-2000
2,Philadelphia 76ers,49,33,0.598,94.8,93.4,1999-2000
3,Orlando Magic,41,41,0.500,100.1,99.4,1999-2000
4,Boston Celtics,35,47,0.427,99.3,100.1,1999-2000
...,...,...,...,...,...,...,...
590,Houston Rockets,53,29,0.646,113.9,109.1,2018-2019
591,San Antonio Spurs,48,34,0.585,111.7,110.0,2018-2019
592,Memphis Grizzlies,33,49,0.402,103.5,106.1,2018-2019
593,New Orleans Pelicans,33,49,0.402,115.4,116.8,2018-2019


In [199]:
# Save and transport new CSV.
teams_data_complete.to_csv("Data Mods/Teams By Season.csv")