# PRO TIP: Transformation
### The <b>.transformation()</b> method can be used to perform aggregation without reshaping
* This is useful for calculating group-level statistics to perform row-level analysis

In [2]:
import numpy as np
import pandas as pd

In [3]:
retail = pd.read_csv("../Agg_&_Reshape_DataFrames/retail_2016_2017.csv")
retail

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0
1,1945945,2016-01-01,1,BABY CARE,0.000,0
2,1945946,2016-01-01,1,BEAUTY,0.000,0
3,1945947,2016-01-01,1,BEVERAGES,0.000,0
4,1945948,2016-01-01,1,BOOKS,0.000,0
...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8


In [8]:
# This uses .assign() to create a new DataFrame column, and .transform() calculates the sum of
# 'sales' by 'store_nbr' and applies the corresponding value to each row
retail.assign(store_sales = (retail
                             .groupby('store_nbr')['sales']
                             .transform('sum')))

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,store_sales
0,1945944,2016-01-01,1,AUTOMOTIVE,0.000,0,6.230011e+06
1,1945945,2016-01-01,1,BABY CARE,0.000,0,6.230011e+06
2,1945946,2016-01-01,1,BEAUTY,0.000,0,6.230011e+06
3,1945947,2016-01-01,1,BEVERAGES,0.000,0,6.230011e+06
4,1945948,2016-01-01,1,BOOKS,0.000,0,6.230011e+06
...,...,...,...,...,...,...,...
1054939,3000883,2017-08-15,9,POULTRY,438.133,0,1.117290e+07
1054940,3000884,2017-08-15,9,PREPARED FOODS,154.553,1,1.117290e+07
1054941,3000885,2017-08-15,9,PRODUCE,2419.729,148,1.117290e+07
1054942,3000886,2017-08-15,9,SCHOOL AND OFFICE SUPPLIES,121.000,8,1.117290e+07


In [9]:
premier_league = pd.read_excel("../Agg_&_Reshape_DataFrames/premier_league_games_full.xlsx")
premier_league.head()

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,1729,England Premier League,2008/2009,Manchester United,Newcastle United,1,1
1,1730,England Premier League,2008/2009,Arsenal,West Bromwich Albion,1,0
2,1731,England Premier League,2008/2009,Sunderland,Liverpool,0,1
3,1732,England Premier League,2008/2009,West Ham United,Wigan Athletic,2,1
4,1733,England Premier League,2008/2009,Aston Villa,Manchester City,4,2


In [14]:

pm = premier_league.assign(
    avg_team_goals = premier_league.groupby(["HomeTeam"])["HomeGoals"].transform("mean"),
    difference = lambda x: x["HomeGoals"] - x["avg_team_goals"]
)

In [22]:
pm.groupby(["HomeTeam", "AwayTeam"]).agg({"difference": "mean"}).sort_values("difference")

Unnamed: 0_level_0,Unnamed: 1_level_0,difference
HomeTeam,AwayTeam,Unnamed: 2_level_1
Chelsea,Bournemouth,-2.190789
Southampton,Wigan Athletic,-1.763158
Southampton,Cardiff City,-1.763158
Leicester City,Hull City,-1.657895
Leicester City,Manchester City,-1.657895
...,...,...
Wolverhampton Wanderers,Blackpool,2.912281
Fulham,Queens Park Rangers,2.982456
Everton,Blackpool,3.302632
Leicester City,Queens Park Rangers,3.342105


In [25]:
pm.query("AwayTeam == 'Blackpool'")

Unnamed: 0,id,league_name,season,HomeTeam,AwayTeam,HomeGoals,AwayGoals,avg_team_goals,difference
769,2498,England Premier League,2010/2011,Wigan Athletic,Blackpool,0,4,1.115789,-1.115789
795,2524,England Premier League,2010/2011,Aston Villa,Blackpool,3,2,1.177632,1.822368
801,2530,England Premier League,2010/2011,West Ham United,Blackpool,0,0,1.466165,-1.466165
828,2557,England Premier League,2010/2011,Bolton Wanderers,Blackpool,2,2,1.368421,0.631579
849,2578,England Premier League,2010/2011,Stoke City,Blackpool,0,1,1.342105,-1.342105
870,2599,England Premier League,2010/2011,Arsenal,Blackpool,6,0,2.013158,3.986842
881,2610,England Premier League,2010/2011,Sunderland,Blackpool,0,2,1.210526,-1.210526
894,2623,England Premier League,2010/2011,Manchester City,Blackpool,1,0,2.401316,-1.401316
916,2645,England Premier League,2010/2011,West Bromwich Albion,Blackpool,3,2,1.330827,1.669173
943,2672,England Premier League,2010/2011,Everton,Blackpool,5,3,1.697368,3.302632
