In [1]:
import numpy as np
import pandas as pd

In [2]:
df1 = pd.DataFrame([
    {'team_away':'NYC', 'team_home':'LA', 'pitcher_away':'phil', 'pitcher_home':'wren','team_win':'NYC'},
    {'team_away':'LON', 'team_home':'LA', 'pitcher_away':'Gary', 'pitcher_home':'Tertia','team_win':'LON'},
    {'team_away':'LA', 'team_home':'NYC', 'pitcher_away':'Jenn', 'pitcher_home':'phil','team_win':'NYC'},
])
df1

Unnamed: 0,team_away,team_home,pitcher_away,pitcher_home,team_win
0,NYC,LA,phil,wren,NYC
1,LON,LA,Gary,Tertia,LON
2,LA,NYC,Jenn,phil,NYC


In [3]:
# Create a new DataFrame for individual team records
df_away_teams = pd.DataFrame()

# Extract away team records
df_away_teams['team'] = df1['team_away']
df_away_teams['location'] = 'Away'
df_away_teams['pitcher'] = df1['pitcher_away']
df_away_teams['result'] = df1['team_win']==df1['team_away']

# Extract home team records
df_home_teams = pd.DataFrame()
df_home_teams['team'] = df1['team_home']
df_home_teams['location'] = 'Home'
df_home_teams['pitcher'] = df1['pitcher_home']
df_home_teams['result'] = df1['team_win']==df1['team_home']

# Concatenate both home and away team records
df_teams = pd.concat([df_away_teams, df_home_teams])

# Reset index
df_teams.reset_index(drop=True, inplace=True)

# Print the new DataFrame
print(df_teams)


  team location pitcher  result
0  NYC     Away    phil    True
1  LON     Away    Gary    True
2   LA     Away    Jenn   False
3   LA     Home    wren   False
4   LA     Home  Tertia   False
5  NYC     Home    phil    True


In [4]:
stats_games = pd.read_csv('data/stats_games.csv')

In [5]:
stats_games.columns

Index(['game_id', 'game_datetime', 'game_date', 'game_type', 'status',
       'away_name', 'home_name', 'away_id', 'home_id', 'doubleheader',
       'game_num', 'home_probable_pitcher', 'away_probable_pitcher',
       'home_pitcher_note', 'away_pitcher_note', 'away_score', 'home_score',
       'current_inning', 'inning_state', 'venue_id', 'venue_name',
       'national_broadcasts', 'series_status', 'winning_team', 'losing_team',
       'winning_pitcher', 'losing_pitcher', 'save_pitcher', 'summary',
       'losing_Team'],
      dtype='object')

In [6]:
away_teams = stats_games.rename(columns={'away_name':'team_name', 'away_probable_pitcher':'pitcher_name', 'away_id':'team_id'})
home_teams = stats_games.rename(columns={'home_name':'team_name', 'home_probable_pitcher':'pitcher_name', 'home_id':'team_id'})

away_teams['location'] = 'away'
home_teams['location'] = 'home'

away_teams['score'] = (away_teams['away_score']).astype(str)+'-'+(away_teams['home_score']).astype(str)
home_teams['score'] = (home_teams['home_score']).astype(str)+'-'+(home_teams['away_score']).astype(str)

away_teams['win'] = (away_teams['team_name'] == away_teams['winning_team']).astype(int)
home_teams['win'] = (home_teams['team_name'] == home_teams['winning_team']).astype(int)

In [7]:
stat_games = pd.concat([away_teams, home_teams])
stat_games.columns

Index(['game_id', 'game_datetime', 'game_date', 'game_type', 'status',
       'team_name', 'home_name', 'team_id', 'home_id', 'doubleheader',
       'game_num', 'home_probable_pitcher', 'pitcher_name',
       'home_pitcher_note', 'away_pitcher_note', 'away_score', 'home_score',
       'current_inning', 'inning_state', 'venue_id', 'venue_name',
       'national_broadcasts', 'series_status', 'winning_team', 'losing_team',
       'winning_pitcher', 'losing_pitcher', 'save_pitcher', 'summary',
       'losing_Team', 'location', 'score', 'win', 'away_name', 'away_id',
       'away_probable_pitcher'],
      dtype='object')

In [9]:
cols_to_keep = ['game_id', 'game_datetime', 'game_date', 'game_type',
                'status', 'team_name', 'team_id', 'pitcher_name',
                'venue_id', 'venue_name', 'location', 'score', 'win']
results = stat_games[cols_to_keep]
results

Unnamed: 0,game_id,game_datetime,game_date,game_type,status,team_name,team_id,pitcher_name,venue_id,venue_name,location,score,win
0,719496,2023-02-24T18:05:00Z,2023-02-24,E,Final,Northeastern Huskies,343,,4309,JetBlue Park,away,3-5,0
1,718938,2023-02-24T20:05:00Z,2023-02-24,S,Final,Texas Rangers,140,Glenn Otto,2603,Surprise Stadium,away,5-6,0
2,719395,2023-02-24T20:10:00Z,2023-02-24,S,Final,Seattle Mariners,136,Robbie Ray,2530,Peoria Stadium,away,3-2,1
3,719391,2023-02-25T18:05:00Z,2023-02-25,S,Final,New York Mets,121,Denyi Reyes,5000,The Ballpark of the Palm Beaches,away,2-4,0
4,719389,2023-02-25T18:05:00Z,2023-02-25,S,Final,Washington Nationals,120,MacKenzie Gore,2520,Roger Dean Chevrolet Stadium,away,3-2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2942,716355,2023-10-01T19:10:00Z,2023-10-01,R,Scheduled,New York Mets,121,,3289,Citi Field,home,0-0,0
2943,716356,2023-10-01T19:10:00Z,2023-10-01,R,Scheduled,Chicago White Sox,145,,4,Guaranteed Rate Field,home,0-0,0
2944,716352,2023-10-01T19:10:00Z,2023-10-01,R,Scheduled,Kansas City Royals,118,,7,Kauffman Stadium,home,0-0,0
2945,716364,2023-10-01T19:10:00Z,2023-10-01,R,Scheduled,Milwaukee Brewers,158,,32,American Family Field,home,0-0,0
