# Usage of DataFrame.agg()
- Aggregate using one or more operations over the specified axis.
- Syntax: <code>DataFrame.agg(func=None, axis=0, *args, **kwargs)</code>
- Description from [pandas docs](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.agg.html)

------

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [7]:
df= pd.read_csv("compustat_from_1990.csv")[['gvkey', 'fyear', 'at', 'sale', 'ni']]
print(df.shape)
df.head()

(211966, 5)


Unnamed: 0,gvkey,fyear,at,sale,ni
0,1004,1990,379.958,466.542,14.801
1,1004,1991,395.351,422.657,10.02
2,1004,1992,365.151,382.78,0.283
3,1004,1993,417.626,407.754,9.494
4,1004,1994,425.814,451.395,10.463


In [8]:
df['neg_ni']= (df.ni <= 0).astype(int)
df.head()

Unnamed: 0,gvkey,fyear,at,sale,ni,neg_ni
0,1004,1990,379.958,466.542,14.801,0
1,1004,1991,395.351,422.657,10.02,0
2,1004,1992,365.151,382.78,0.283,0
3,1004,1993,417.626,407.754,9.494,0
4,1004,1994,425.814,451.395,10.463,0


------------

### Aggregate data to year-level

In [9]:
yearly_loss= df.groupby('fyear').agg({
                            'gvkey':'count',
                            'neg_ni':('mean')
                            }).reset_index()
yearly_loss.rename(columns={'gvkey':'count'}, inplace= True)
yearly_loss= yearly_loss[yearly_loss.fyear>= 1990]
yearly_loss.head()

Unnamed: 0,fyear,count,neg_ni
0,1990,6674,0.385975
1,1991,6783,0.401003
2,1992,7133,0.382308
3,1993,7596,0.367825
4,1994,8061,0.33569


--------------