In [28]:
import stata_setup
stata_setup.config("C:/Program Files/Stata17", "se")

## Set up simulated data in Stata
Data broadly resembles a year panel based on IAB data with person (persnr), establishment (betnr) and year identifiers.

In [29]:
%%stata
clear all
set obs 10000000

gen betnr = runiformint(1, 1000)
gen persnr = runiformint(1,100000)
gen year = runiformint(1972, 2019)

gen double tentgelt = runiform(0, 100000) 

gen german = runiformint(0,1)
bysort persnr: replace german = german[_N]


. clear all

. set obs 10000000
Number of observations (_N) was 0, now 10,000,000.

. 
. gen betnr = runiformint(1, 1000)

. gen persnr = runiformint(1,100011)

. gen year = runiformint(1972, 2019)

. 
. gen double tentgelt = runiform(0, 100000) 

. 
. gen german = runiformint(0,1)

. bysort persnr: replace german = german[_N]
(4,950,882 real changes made)

. 


# Working with yearly panels
## Egen: max()
    - initialize variable
    - use max() function to get largest value into last observation determined by 'bysort'
        - note missings are ignored by max() unless *all* observations are missing
    - replace within 'bysort' by the last observation

In [30]:
%%stata
gen max_tentgelt = tentgelt
bysort persnr: replace max_tentgelt = max(max_tentgelt[_n-1], max_tentgelt)
bysort persnr: replace max_tentgelt = max_tentgelt[_N]


. gen max_tentgelt = tentgelt

. bysort persnr: replace max_tentgelt = max(max_tentgelt[_n-1], max_tentgelt)
(9,481,927 real changes made)

. bysort persnr: replace max_tentgelt = max_tentgelt[_N]
(4,946,233 real changes made)

. 


### Run-time comparison


In [31]:
%%stata
drop max_tentgelt
timer on 1
gen max_tentgelt = tentgelt
bysort persnr: replace max_tentgelt = max(max_tentgelt[_n-1], max_tentgelt)
bysort persnr: replace max_tentgelt = max_tentgelt[_N]
timer off 1

timer on 2
egen max_tentgelt_egen = max(tentgelt), by(persnr)
timer off 2

timer on 3
gegen max_tentgelt_gegen = max(tentgelt), by(persnr)
timer off 3

timer list


. drop max_tentgelt

. timer on 1

. gen max_tentgelt = tentgelt

. bysort persnr: replace max_tentgelt = max(max_tentgelt[_n-1], max_tentgelt)
(9,481,927 real changes made)

. bysort persnr: replace max_tentgelt = max_tentgelt[_N]
(4,946,233 real changes made)

. timer off 1

. 
. timer on 2

. egen max_tentgelt_egen = max(tentgelt), by(persnr)

. timer off 2

. 
. timer on 3

. gegen max_tentgelt_gegen = max(tentgelt), by(persnr)

. timer off 3

. 
. timer list
   1:      1.85 /        1 =       1.8490
   2:      3.56 /        1 =       3.5600
   3:      0.59 /        1 =       0.5910

. 
