## Foundations for efficiencies


### Built-in practice: range()

In [1]:
# Create a range object that goes from 0 to 5
nums = range(6)
print(type(nums))

# Convert nums to a list
nums_list =list(nums)
print(nums_list)

# Create a new list of odd numbers from 1 to 11 by unpacking a range object
nums_list2 = [*range(1,13,2)]
print(nums_list2)

<class 'range'>
[0, 1, 2, 3, 4, 5]
[1, 3, 5, 7, 9, 11]


### Built-in practice: enumerate()

In [2]:
names = ['Jerry', 'Kramer', 'Elaine', 'George', 'Newman']

In [3]:
# Rewrite the for loop to use enumerate
indexed_names = []
for i, name in enumerate(names):
    index_name = (i,name)
    indexed_names.append(index_name) 
print(indexed_names)

# Rewrite the above for loop using list comprehension
indexed_names_comp = [(i,name) for i,name in enumerate(names)]
print(indexed_names_comp)

# Unpack an enumerate object with a starting index of one
indexed_names_unpack = [*enumerate(names, 1)]
print(indexed_names_unpack)

[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(1, 'Jerry'), (2, 'Kramer'), (3, 'Elaine'), (4, 'George'), (5, 'Newman')]


### Built-in practice: map()

In [4]:
# Use map to apply str.upper to each element in names
names_map  = map(str.upper, names)

# Print the type of the names_map
print(type(names_map))

# Unpack names_map into a list
names_uppercase = [*names_map]

# Print the list created above
print(names_uppercase)

<class 'map'>
['JERRY', 'KRAMER', 'ELAINE', 'GEORGE', 'NEWMAN']


### Bringing it all together: Festivus!

In [22]:
def welcome_guest(guest_and_time):
    """
    Returns a welcome string for the guest_and_time tuple.
    
    Args:
        guest_and_time (tuple): The guest and time tuple to create
            a welcome string for.
            
    Returns:
        welcome_string (str): A string welcoming the guest to Festivus.
        'Welcome to Festivus {guest}... You're {time} min late.'
    
    """
    guest = guest_and_time[0]
    arrival_time = guest_and_time[1]
    welcome_string = "Welcome to Festivus {}... You're {} min late.".format(guest,arrival_time)
    return welcome_string

In [23]:
# Create a list of arrival times
arrival_times = [*range(10,60,10)]

# Convert arrival_times to an array and update the times
arrival_times_np = np.array(arrival_times)
new_times = arrival_times_np - 3

# Use list comprehension and enumerate to pair guests to new times
guest_arrivals = [(names[i],time) for i,time in enumerate(new_times)]

# Map the welcome_guest function to each (guest,time) pair
welcome_map = map(welcome_guest, guest_arrivals)

guest_welcomes = [*welcome_map]
print(*guest_welcomes, sep='\n')

print(names)

Welcome to Festivus Jerry... You're 7 min late.
Welcome to Festivus Kramer... You're 17 min late.
Welcome to Festivus Elaine... You're 27 min late.
Welcome to Festivus George... You're 37 min late.
Welcome to Festivus Newman... You're 47 min late.
['Jerry', 'Kramer', 'Elaine', 'George', 'Newman']


## Gaining efficiencies

### Combining Pokémon names and types

In [24]:
names = ['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aerodactyl', 'Aggron', 'Aipom', 'Alakazam', 'Alomomola', 'Altaria']
primary_types = ['Grass', 'Psychic', 'Dark', 'Bug', 'Rock', 'Steel', 'Normal', 'Psychic', 'Water', 'Dragon']

# Combine names and primary_types
names_type1 = [*zip(names, primary_types)]

print(*names_type1[:5], sep='\n')

('Abomasnow', 'Grass')
('Abra', 'Psychic')
('Absol', 'Dark')
('Accelgor', 'Bug')
('Aerodactyl', 'Rock')


### Counting Pokémon from a sample

In [26]:
from collections import Counter
# Collect the count of primary types
type_count = Counter(primary_types)
print(type_count, '\n')

Counter({'Psychic': 2, 'Grass': 1, 'Dark': 1, 'Bug': 1, 'Rock': 1, 'Steel': 1, 'Normal': 1, 'Water': 1, 'Dragon': 1}) 



### Combinations of Pokémon

In [28]:
# Import combinations from itertools
from itertools import combinations

pokemon = ['Geodude', 'Cubone', 'Lickitung', 'Persian', 'Diglett']
# Create a combination object with pairs of Pokémon
combos_obj = combinations(pokemon, 2)
print(type(combos_obj), '\n')

# Convert combos_obj to a list by unpacking
combos_2 = [*combos_obj]
print(combos_2, '\n')

# Collect all possible combinations of 4 Pokémon directly into a list
combos_4 = [*combinations(pokemon, 4)]
print(combos_4)

<class 'itertools.combinations'> 

[('Geodude', 'Cubone'), ('Geodude', 'Lickitung'), ('Geodude', 'Persian'), ('Geodude', 'Diglett'), ('Cubone', 'Lickitung'), ('Cubone', 'Persian'), ('Cubone', 'Diglett'), ('Lickitung', 'Persian'), ('Lickitung', 'Diglett'), ('Persian', 'Diglett')] 

[('Geodude', 'Cubone', 'Lickitung', 'Persian'), ('Geodude', 'Cubone', 'Lickitung', 'Diglett'), ('Geodude', 'Cubone', 'Persian', 'Diglett'), ('Geodude', 'Lickitung', 'Persian', 'Diglett'), ('Cubone', 'Lickitung', 'Persian', 'Diglett')]


### Comparing Pokédexes

In [29]:
ash_pokedex = ['Pikachu', 'Bulbasaur', 'Koffing', 'Spearow', 'Vulpix', 'Wigglytuff', 'Zubat', 'Rattata', 'Psyduck', 'Squirtle']
misty_pokedex = ['Krabby', 'Horsea', 'Slowbro', 'Tentacool', 'Vaporeon', 'Magikarp', 'Poliwag', 'Starmie', 'Psyduck', 'Squirtle']
# Convert both lists to sets
ash_set = set(ash_pokedex)
misty_set = set(misty_pokedex)

# Find the Pokémon that exist in both sets
both = ash_set.intersection(misty_set)
print(both)

# Find the Pokémon that Ash has and Misty does not have
ash_only = ash_set.difference(misty_set)
print(ash_only)

# Find the Pokémon that are in only one set (not both)
unique_to_set = ash_set.symmetric_difference(misty_set)
print(unique_to_set)

{'Psyduck', 'Squirtle'}
{'Spearow', 'Vulpix', 'Koffing', 'Rattata', 'Bulbasaur', 'Pikachu', 'Zubat', 'Wigglytuff'}
{'Tentacool', 'Poliwag', 'Slowbro', 'Vaporeon', 'Spearow', 'Vulpix', 'Zubat', 'Starmie', 'Krabby', 'Koffing', 'Rattata', 'Bulbasaur', 'Horsea', 'Pikachu', 'Magikarp', 'Wigglytuff'}


### Searching for Pokémon

In [30]:
print('Psyduck' in ash_set)

True


### Gathering unique Pokémon

In [32]:
set(pokemon)

{'Cubone', 'Diglett', 'Geodude', 'Lickitung', 'Persian'}

### Gathering Pokémon without a loop

In [34]:
poke_names = ['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aerodactyl', 'Aggron', 'Aipom', 'Alakazam', 'Alomomola', 'Altaria']
poke_gens = [4, 1, 3, 5, 1, 3, 2, 1, 5, 3]

# Collect Pokémon that belong to generation 1 or generation 2
gen1_gen2_pokemon = [name for name,gen in zip(poke_names, poke_gens) if gen < 3]

# Create a map object that stores the name lengths
name_lengths_map = map(len, gen1_gen2_pokemon)

# Combine gen1_gen2_pokemon and name_lengths_map into a list
gen1_gen2_name_lengths = [*zip(gen1_gen2_pokemon, name_lengths_map)]

print(gen1_gen2_name_lengths[:5])

[('Abra', 4), ('Aerodactyl', 10), ('Aipom', 5), ('Alakazam', 8)]


### Pokémon totals and averages without a loop

In [39]:
import numpy as np
stats = np.array([np.array([90, 92, 75, 92, 85, 60]), np.array([ 25,  20,  15, 105,  55,  90]), np.array([ 65, 130,  60,  75,  60,  75]), np.array([ 80,  70,  40, 100,  60, 145]), np.array([ 80, 105,  65,  60,  75, 130])])
names = ['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aerodactyl']

In [40]:
# Create a total stats array
total_stats_np = stats.sum(axis=1)

# Create an average stats array
avg_stats_np = stats.mean(axis = 1)

# Combine names, total_stats_np, and avg_stats_np into a list
poke_list_np = [*zip(names, total_stats_np, avg_stats_np)]

top_3 = sorted(poke_list_np, key=lambda x: x[1], reverse=True)[:3]
print('3 strongest Pokémon:\n{}'.format(top_3))

3 strongest Pokémon:
[('Aerodactyl', 515, 85.83333333333333), ('Accelgor', 495, 82.5), ('Abomasnow', 494, 82.33333333333333)]


### One-time calculation loop

In [42]:
# Import Counter
from collections import Counter

generations = [4, 1, 3, 5, 1, 3, 2, 1, 5, 3, 6, 4, 5, 2, 3, 1, 1, 4, 5, 5]
# Collect the count of each generation
gen_counts = Counter(generations)

# Improve for loop by moving one calculation above the loop
total_count = len(generations)

for gen,count in gen_counts.items():
    gen_percent = round(count / total_count * 100, 2)
    print('generation {}: count = {:3} percentage = {}'
          .format(gen, count, gen_percent))

generation 4: count =   3 percentage = 15.0
generation 1: count =   5 percentage = 25.0
generation 3: count =   4 percentage = 20.0
generation 5: count =   5 percentage = 25.0
generation 2: count =   2 percentage = 10.0
generation 6: count =   1 percentage = 5.0


### Holistic conversion loop

In [44]:
pokemon_types = ['Bug', 'Dark', 'Dragon', 'Electric', 'Fairy', 'Fighting', 'Fire', 'Flying', 'Ghost', 'Grass', 'Ground', 'Ice', 'Normal', 'Poison', 'Psychic', 'Rock', 'Steel', 'Water']

In [45]:
# Collect all possible pairs using combinations()
possible_pairs = [*combinations(pokemon_types, 2)]

# Create an empty list called enumerated_tuples
enumerated_tuples = []

# Add a line to append each enumerated_pair_tuple to the empty list above
for i,pair in enumerate(possible_pairs, 1):
    enumerated_pair_tuple = (i,) + pair
    enumerated_tuples.append(enumerated_pair_tuple)

# Convert all tuples in enumerated_tuples to a list
enumerated_pairs = [*map(list, enumerated_tuples)]
print(enumerated_pairs)

[[1, 'Bug', 'Dark'], [2, 'Bug', 'Dragon'], [3, 'Bug', 'Electric'], [4, 'Bug', 'Fairy'], [5, 'Bug', 'Fighting'], [6, 'Bug', 'Fire'], [7, 'Bug', 'Flying'], [8, 'Bug', 'Ghost'], [9, 'Bug', 'Grass'], [10, 'Bug', 'Ground'], [11, 'Bug', 'Ice'], [12, 'Bug', 'Normal'], [13, 'Bug', 'Poison'], [14, 'Bug', 'Psychic'], [15, 'Bug', 'Rock'], [16, 'Bug', 'Steel'], [17, 'Bug', 'Water'], [18, 'Dark', 'Dragon'], [19, 'Dark', 'Electric'], [20, 'Dark', 'Fairy'], [21, 'Dark', 'Fighting'], [22, 'Dark', 'Fire'], [23, 'Dark', 'Flying'], [24, 'Dark', 'Ghost'], [25, 'Dark', 'Grass'], [26, 'Dark', 'Ground'], [27, 'Dark', 'Ice'], [28, 'Dark', 'Normal'], [29, 'Dark', 'Poison'], [30, 'Dark', 'Psychic'], [31, 'Dark', 'Rock'], [32, 'Dark', 'Steel'], [33, 'Dark', 'Water'], [34, 'Dragon', 'Electric'], [35, 'Dragon', 'Fairy'], [36, 'Dragon', 'Fighting'], [37, 'Dragon', 'Fire'], [38, 'Dragon', 'Flying'], [39, 'Dragon', 'Ghost'], [40, 'Dragon', 'Grass'], [41, 'Dragon', 'Ground'], [42, 'Dragon', 'Ice'], [43, 'Dragon', 'Nor

### Bringing it all together: Pokémon z-scores

In [46]:
hps = np.array([80.0, 60.0, 131.0, 62.0, 71.0, 109.0, 45.0, 53.0, 73.0, 60.0])
names = ['Abomasnow', 'Abra', 'Absol', 'Accelgor', 'Aerodactyl', 'Aggron', 'Aipom', 'Alakazam', 'Alomomola', 'Altaria']

# Calculate the total HP avg and total HP standard deviation
hp_avg = hps.mean()
hp_std = hps.std()

# Use NumPy to eliminate the previous for loop
z_scores = (hps - hp_avg)/hp_std

# Combine names, hps, and z_scores
poke_zscores2 = [*zip(names, hps, z_scores)]
print(*poke_zscores2[:3], sep='\n')

# Use list comprehension with the same logic as the highest_hp_pokemon code block
highest_hp_pokemon = [(name,hp,zscore) for name,hp,zscore in poke_zscores2 if zscore > 2]
print(*highest_hp_pokemon, sep='\n')

('Abomasnow', 80.0, 0.22246758193284516)
('Abra', 60.0, -0.5720594963987454)
('Absol', 131.0, 2.2485116316784013)
('Absol', 131.0, 2.2485116316784013)


### Basic pandas optimizations

### Iterating with .iterrows()

In [50]:
import pandas as pd
pit_df = pd.read_csv('baseball_stats.csv')
pit_df.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424


In [48]:
# Iterate over pit_df and print each index variable and then each row
for i,row in pit_df.iterrows():
    print(i)
    print(row)
    print(type(row))

0
Team              ARI
League             NL
Year             2012
RS                734
RA                688
W                  81
OBP             0.328
SLG             0.418
BA              0.259
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP            0.317
OSLG            0.415
Name: 0, dtype: object
<class 'pandas.core.series.Series'>
1
Team              ATL
League             NL
Year             2012
RS                700
RA                600
W                  94
OBP              0.32
SLG             0.389
BA              0.247
Playoffs            1
RankSeason          4
RankPlayoffs        5
G                 162
OOBP            0.306
OSLG            0.378
Name: 1, dtype: object
<class 'pandas.core.series.Series'>
2
Team              BAL
League             AL
Year             2012
RS                712
RA                705
W                  93
OBP             0.311
SLG             0.417
BA              0.247
Playoffs          

Name: 248, dtype: object
<class 'pandas.core.series.Series'>
249
Team              COL
League             NL
Year             2004
RS                833
RA                923
W                  68
OBP             0.345
SLG             0.455
BA              0.275
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP            0.372
OSLG            0.471
Name: 249, dtype: object
<class 'pandas.core.series.Series'>
250
Team              DET
League             AL
Year             2004
RS                827
RA                844
W                  72
OBP             0.337
SLG             0.449
BA              0.272
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP             0.34
OSLG            0.441
Name: 250, dtype: object
<class 'pandas.core.series.Series'>
251
Team              FLA
League             NL
Year             2004
RS                718
RA                700
W                  83
OBP          

Name: 442, dtype: object
<class 'pandas.core.series.Series'>
443
Team              SDP
League             NL
Year             1998
RS                749
RA                635
W                  98
OBP              0.33
SLG             0.409
BA              0.253
Playoffs            1
RankSeason          4
RankPlayoffs        2
G                 162
OOBP              NaN
OSLG              NaN
Name: 443, dtype: object
<class 'pandas.core.series.Series'>
444
Team              SEA
League             AL
Year             1998
RS                859
RA                855
W                  76
OBP             0.345
SLG             0.468
BA              0.276
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 161
OOBP              NaN
OSLG              NaN
Name: 444, dtype: object
<class 'pandas.core.series.Series'>
445
Team              SFG
League             NL
Year             1998
RS                845
RA                739
W                  89
OBP          

Name: 634, dtype: object
<class 'pandas.core.series.Series'>
635
Team              STL
League             NL
Year             1989
RS                632
RA                608
W                  86
OBP             0.321
SLG             0.363
BA              0.258
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 164
OOBP              NaN
OSLG              NaN
Name: 635, dtype: object
<class 'pandas.core.series.Series'>
636
Team              TEX
League             AL
Year             1989
RS                695
RA                714
W                  83
OBP             0.326
SLG             0.394
BA              0.263
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 636, dtype: object
<class 'pandas.core.series.Series'>
637
Team              TOR
League             AL
Year             1989
RS                731
RA                651
W                  89
OBP          

Name: 868, dtype: object
<class 'pandas.core.series.Series'>
869
Team              STL
League             NL
Year             1979
RS                731
RA                693
W                  86
OBP             0.331
SLG             0.401
BA              0.278
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 163
OOBP              NaN
OSLG              NaN
Name: 869, dtype: object
<class 'pandas.core.series.Series'>
870
Team              TEX
League             AL
Year             1979
RS                750
RA                698
W                  83
OBP             0.334
SLG             0.409
BA              0.278
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 870, dtype: object
<class 'pandas.core.series.Series'>
871
Team              TOR
League             AL
Year             1979
RS                613
RA                862
W                  53
OBP          

Team              KCR
League             AL
Year             1973
RS                755
RA                752
W                  88
OBP             0.339
SLG             0.381
BA              0.261
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1006, dtype: object
<class 'pandas.core.series.Series'>
1007
Team              LAD
League             NL
Year             1973
RS                675
RA                565
W                  95
OBP             0.323
SLG             0.371
BA              0.263
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1007, dtype: object
<class 'pandas.core.series.Series'>
1008
Team              MIL
League             AL
Year             1973
RS                708
RA                731
W                  74
OBP             0.325
SLG             0.388
BA              0.253
Playoffs

Team              PHI
League             NL
Year             1963
RS                642
RA                578
W                  87
OBP             0.306
SLG             0.381
BA              0.252
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1207, dtype: object
<class 'pandas.core.series.Series'>
1208
Team              PIT
League             NL
Year             1963
RS                567
RA                595
W                  74
OBP             0.309
SLG             0.359
BA               0.25
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1208, dtype: object
<class 'pandas.core.series.Series'>
1209
Team              SFG
League             NL
Year             1963
RS                725
RA                641
W                  88
OBP             0.316
SLG             0.414
BA              0.258
Playoffs

In [49]:
# Print the row and type of each row
for row_tuple in pit_df.iterrows():
    print(row_tuple)
    print(type(row_tuple))

(0, Team              ARI
League             NL
Year             2012
RS                734
RA                688
W                  81
OBP             0.328
SLG             0.418
BA              0.259
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP            0.317
OSLG            0.415
Name: 0, dtype: object)
<class 'tuple'>
(1, Team              ATL
League             NL
Year             2012
RS                700
RA                600
W                  94
OBP              0.32
SLG             0.389
BA              0.247
Playoffs            1
RankSeason          4
RankPlayoffs        5
G                 162
OOBP            0.306
OSLG            0.378
Name: 1, dtype: object)
<class 'tuple'>
(2, Team              BAL
League             AL
Year             2012
RS                712
RA                705
W                  93
OBP             0.311
SLG             0.417
BA              0.247
Playoffs            1
RankSeason          5
RankPl

Name: 279, dtype: object)
<class 'tuple'>
(280, Team              DET
League             AL
Year             2003
RS                591
RA                928
W                  43
OBP               0.3
SLG             0.375
BA               0.24
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP            0.352
OSLG            0.461
Name: 280, dtype: object)
<class 'tuple'>
(281, Team              FLA
League             NL
Year             2003
RS                751
RA                692
W                  91
OBP             0.333
SLG             0.421
BA              0.266
Playoffs            1
RankSeason          5
RankPlayoffs        1
G                 162
OOBP            0.325
OSLG            0.396
Name: 281, dtype: object)
<class 'tuple'>
(282, Team              HOU
League             NL
Year             2003
RS                805
RA                677
W                  87
OBP             0.336
SLG             0.431
BA              0.26

(534, Team              ATL
League             NL
Year             1992
RS                682
RA                569
W                  98
OBP             0.316
SLG             0.388
BA              0.254
Playoffs            1
RankSeason          1
RankPlayoffs        2
G                 162
OOBP              NaN
OSLG              NaN
Name: 534, dtype: object)
<class 'tuple'>
(535, Team              BAL
League             AL
Year             1992
RS                705
RA                656
W                  89
OBP              0.34
SLG             0.398
BA              0.259
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 535, dtype: object)
<class 'tuple'>
(536, Team              BOS
League             AL
Year             1992
RS                599
RA                669
W                  73
OBP             0.321
SLG             0.347
BA              0.246
Playoffs            0
RankSeason        

Name: 755, dtype: object)
<class 'tuple'>
(756, Team              MON
League             NL
Year             1984
RS                593
RA                585
W                  78
OBP             0.312
SLG             0.362
BA              0.251
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 161
OOBP              NaN
OSLG              NaN
Name: 756, dtype: object)
<class 'tuple'>
(757, Team              NYM
League             NL
Year             1984
RS                652
RA                676
W                  90
OBP              0.32
SLG             0.369
BA              0.257
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 757, dtype: object)
<class 'tuple'>
(758, Team              NYY
League             AL
Year             1984
RS                758
RA                679
W                  87
OBP             0.339
SLG             0.404
BA              0.27

Name: 976, dtype: object)
<class 'tuple'>
(977, Team              CHW
League             AL
Year             1974
RS                684
RA                721
W                  80
OBP              0.33
SLG             0.389
BA              0.268
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 163
OOBP              NaN
OSLG              NaN
Name: 977, dtype: object)
<class 'tuple'>
(978, Team              CIN
League             NL
Year             1974
RS                776
RA                631
W                  98
OBP             0.343
SLG             0.394
BA               0.26
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 163
OOBP              NaN
OSLG              NaN
Name: 978, dtype: object)
<class 'tuple'>
(979, Team              CLE
League             AL
Year             1974
RS                662
RA                694
W                  77
OBP             0.311
SLG              0.37
BA              0.25

<class 'tuple'>
(1209, Team              SFG
League             NL
Year             1963
RS                725
RA                641
W                  88
OBP             0.316
SLG             0.414
BA              0.258
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1209, dtype: object)
<class 'tuple'>
(1210, Team              STL
League             NL
Year             1963
RS                747
RA                628
W                  93
OBP             0.326
SLG             0.403
BA              0.271
Playoffs            0
RankSeason        NaN
RankPlayoffs      NaN
G                 162
OOBP              NaN
OSLG              NaN
Name: 1210, dtype: object)
<class 'tuple'>
(1211, Team              WSA
League             AL
Year             1963
RS                578
RA                812
W                  56
OBP             0.293
SLG             0.351
BA              0.227
Playoffs           

### Run differentials with .iterrows()

In [57]:
# Create an empty list to store run differentials
run_diffs = []

def calc_run_diff(runs_scored, runs_allowed):

    run_diff = runs_scored - runs_allowed

    return run_diff

# Write a for loop and collect runs allowed and runs scored for each row
for i,row in pit_df.iterrows():
    runs_scored = row['RS']
    runs_allowed = row['RA']
    
    # Use the provided function to calculate run_diff for each row
    run_diff = calc_run_diff(runs_scored, runs_allowed)
    
    # Append each run differential to the output list
    run_diffs.append(run_diff)

pit_df['RD'] = run_diffs
pit_df.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,RD
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,46
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,100
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,7
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,-72
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,-146


### Iterating with .itertuples()

In [58]:
# Loop over the DataFrame and print each row's Index, Year and Wins (W)
for row in pit_df.itertuples():
    i = row.Index
    year = row.Year
    wins = row.W
  
  # Check if rangers made Playoffs (1 means yes; 0 means no)
    if row.Playoffs == 1:
        print(i, year, wins)

1 2012 94
2 2012 93
6 2012 97
9 2012 88
18 2012 95
19 2012 94
24 2012 94
25 2012 88
27 2012 93
29 2012 98
30 2011 94
39 2011 95
45 2011 96
48 2011 97
50 2011 102
55 2011 90
56 2011 91
57 2011 96
61 2010 91
66 2010 91
76 2010 94
78 2010 95
80 2010 97
84 2010 92
86 2010 96
87 2010 90
93 2009 95
98 2009 92
103 2009 97
104 2009 95
106 2009 87
108 2009 103
110 2009 93
115 2009 91
123 2008 95
124 2008 97
125 2008 89
133 2008 100
134 2008 84
135 2008 90
140 2008 92
146 2008 97
150 2007 90
153 2007 96
154 2007 85
157 2007 96
158 2007 90
163 2007 94
168 2007 94
170 2007 89
189 2006 95
194 2006 88
196 2006 96
197 2006 97
198 2006 97
199 2006 93
202 2006 88
205 2006 83
211 2005 90
213 2005 95
215 2005 99
221 2005 89
223 2005 95
228 2005 95
232 2005 82
235 2005 100
240 2004 92
242 2004 96
244 2004 98
252 2004 92
254 2004 93
256 2004 92
259 2004 101
266 2004 105
272 2003 101
274 2003 95
275 2003 88
281 2003 91
286 2003 90
289 2003 101
290 2003 96
295 2003 100
300 2002 99
301 2002 98
302 2002 101
31

### Run differentials with .itertuples()

In [60]:
run_diffs = []

# Loop over the DataFrame and calculate each row's run differential
for row in pit_df.itertuples():
    
    runs_scored = row.RS
    runs_allowed = row.RA

    run_diff = calc_run_diff(runs_scored, runs_allowed)
    
    run_diffs.append(run_diff)

# Append new column
pit_df['RD'] = run_diffs
pit_df.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,RD
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,46
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,100
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,7
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,-72
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,-146


### Analyzing baseball stats with .apply()

In [63]:
# Gather total runs scored in all games per year
total_runs_scored = pit_df[['RS', 'RA']].apply(sum, axis=1)
total_runs_scored.head()

0    1422
1    1300
2    1417
3    1540
4    1372
dtype: int64

In [65]:
def text_playoffs(num_playoffs): 
    if num_playoffs == 1:
        return 'Yes'
    else:
        return 'No' 
    
# Convert numeric playoffs to text
textual_playoffs = pit_df.apply(lambda row: text_playoffs(row['Playoffs']), axis=1)
textual_playoffs.head()

0     No
1    Yes
2    Yes
3     No
4     No
dtype: object

### Settle a debate with .apply()

In [70]:
def calc_win_perc(wins, games_played):
    win_perc = wins / games_played
    return np.round(win_perc,2)

# Display the first five rows of the DataFrame
display(pit_df.head())

# Create a win percentage Series 
win_percs = pit_df.apply(lambda row: calc_win_perc(row['W'], row['G']), axis=1)
display(win_percs.head(), '\n')

# Append a new column to dbacks_df
pit_df['WP'] = win_percs
display(pit_df.head(), '\n')

# Display dbacks_df where WP is greater than 0.50
display(pit_df[pit_df['WP'] >= 0.50])

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,RD,WP
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,46,0.5
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,100,0.58
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,7,0.57
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,-72,0.43
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,-146,0.38


0    0.50
1    0.58
2    0.57
3    0.43
4    0.38
dtype: float64

'\n'

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,RD,WP
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,46,0.5
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,100,0.58
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,7,0.57
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,-72,0.43
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,-146,0.38


'\n'

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,RD,WP
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,46,0.50
1,ATL,NL,2012,700,600,94,0.320,0.389,0.247,1,4.0,5.0,162,0.306,0.378,100,0.58
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,7,0.57
5,CHW,AL,2012,748,676,85,0.318,0.422,0.255,0,,,162,0.319,0.405,72,0.52
6,CIN,NL,2012,669,588,97,0.315,0.411,0.251,1,2.0,4.0,162,0.305,0.390,81,0.60
9,DET,AL,2012,726,670,88,0.335,0.422,0.268,1,6.0,2.0,162,0.314,0.402,56,0.54
12,LAA,AL,2012,767,699,89,0.332,0.433,0.274,0,,,162,0.310,0.403,68,0.55
13,LAD,NL,2012,637,597,86,0.317,0.374,0.252,0,,,162,0.310,0.364,40,0.53
15,MIL,NL,2012,776,733,83,0.325,0.437,0.259,0,,,162,0.326,0.414,43,0.51
18,NYY,AL,2012,804,668,95,0.337,0.453,0.265,1,3.0,3.0,162,0.311,0.419,136,0.59


### Replacing .iloc with underlying arrays

In [71]:
# Use the W array and G array to calculate win percentages
win_percs_np = calc_win_perc(pit_df['W'].values, pit_df['G'].values)

# Append a new column to baseball_df that stores all win percentages
pit_df['WP'] = win_percs_np 

print(pit_df.head())

  Team League  Year   RS   RA   W    OBP    SLG     BA  Playoffs  RankSeason  \
0  ARI     NL  2012  734  688  81  0.328  0.418  0.259         0         NaN   
1  ATL     NL  2012  700  600  94  0.320  0.389  0.247         1         4.0   
2  BAL     AL  2012  712  705  93  0.311  0.417  0.247         1         5.0   
3  BOS     AL  2012  734  806  69  0.315  0.415  0.260         0         NaN   
4  CHC     NL  2012  613  759  61  0.302  0.378  0.240         0         NaN   

   RankPlayoffs    G   OOBP   OSLG   RD    WP  
0           NaN  162  0.317  0.415   46  0.50  
1           5.0  162  0.306  0.378  100  0.58  
2           4.0  162  0.315  0.403    7  0.57  
3           NaN  162  0.331  0.428  -72  0.43  
4           NaN  162  0.335  0.424 -146  0.38  


### Bringing it all together: Predict win percentage

In [73]:
win_perc_preds_loop = []

def predict_win_perc(RS, RA):
    prediction = RS ** 2 / (RS ** 2 + RA ** 2)
    return np.round(prediction, 2)

# Use a loop and .itertuples() to collect each row's predicted win percentage
for row in pit_df.itertuples():
    runs_scored = row.RS
    runs_allowed = row.RA
    win_perc_pred = predict_win_perc(runs_scored, runs_allowed)
    win_perc_preds_loop.append(win_perc_pred)

# Apply predict_win_perc to each row of the DataFrame
win_perc_preds_apply = pit_df.apply(lambda row: predict_win_perc(row['RS'], row['RA']), axis=1)

# Calculate the win percentage predictions using NumPy arrays
win_perc_preds_np = predict_win_perc(pit_df['RS'].values, pit_df["RA"].values)
pit_df['WP_preds'] = win_perc_preds_np
print(pit_df.head())

  Team League  Year   RS   RA   W    OBP    SLG     BA  Playoffs  RankSeason  \
0  ARI     NL  2012  734  688  81  0.328  0.418  0.259         0         NaN   
1  ATL     NL  2012  700  600  94  0.320  0.389  0.247         1         4.0   
2  BAL     AL  2012  712  705  93  0.311  0.417  0.247         1         5.0   
3  BOS     AL  2012  734  806  69  0.315  0.415  0.260         0         NaN   
4  CHC     NL  2012  613  759  61  0.302  0.378  0.240         0         NaN   

   RankPlayoffs    G   OOBP   OSLG   RD    WP  WP_preds  
0           NaN  162  0.317  0.415   46  0.50      0.53  
1           5.0  162  0.306  0.378  100  0.58      0.58  
2           4.0  162  0.315  0.403    7  0.57      0.50  
3           NaN  162  0.331  0.428  -72  0.43      0.45  
4           NaN  162  0.335  0.424 -146  0.38      0.39  
