#### Importing the pandas and Numpy

In [1]:
import numpy as np
import pandas as pd

#### Creating a Series Object

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

#### Creating a DataFrame 

#### By passing a Numpy array, with a datetime index and labeled columns

In [4]:
dates = pd.date_range("20130101",periods=6)
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list("ABCD"))

In [5]:
df

Unnamed: 0,A,B,C,D
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955
2013-01-03,-1.736064,0.098297,0.463387,-0.304373
2013-01-04,-0.628743,1.492278,2.419546,0.186677
2013-01-05,1.187242,-0.847169,0.311317,-0.582514
2013-01-06,1.80409,-0.821284,1.979296,0.413671


##### Creating a DataFrame by passing a dict of objects 

In [8]:
dic = {
    "A": 1.0,
    "B":pd.Timestamp(20130102),
    "C":pd.Series(1,index=list(range(4)),dtype='float32'),
    "D":np.array([3]*4,dtype='int32'),
    "E":pd.Categorical(["test","train","test","train"]),
    "F":"foo"
      }
df2 = pd.DataFrame(dic)
df2.head()

Unnamed: 0,A,B,C,D,E,F
0,1.0,1970-01-01 00:00:00.020130102,1.0,3,test,foo
1,1.0,1970-01-01 00:00:00.020130102,1.0,3,train,foo
2,1.0,1970-01-01 00:00:00.020130102,1.0,3,test,foo
3,1.0,1970-01-01 00:00:00.020130102,1.0,3,train,foo


In [9]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Viewing Data

In [10]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955
2013-01-03,-1.736064,0.098297,0.463387,-0.304373
2013-01-04,-0.628743,1.492278,2.419546,0.186677
2013-01-05,1.187242,-0.847169,0.311317,-0.582514


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.628743,1.492278,2.419546,0.186677
2013-01-05,1.187242,-0.847169,0.311317,-0.582514
2013-01-06,1.80409,-0.821284,1.979296,0.413671


In [13]:
print(df.index)
print(df.columns)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')


**Numpy arrays have one dtype for the entire array, while pandas DataFrame have one dtype per column**

**Showing quick statistic summary of data**

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.042662,-0.022114,0.438465,-0.058093
std,1.564319,0.868838,1.717717,0.35204
min,-1.736064,-0.847169,-2.409358,-0.582514
25%,-1.37053,-0.690607,-0.02222,-0.237269
50%,0.27925,-0.10014,0.387352,-0.031011
75%,1.232241,0.207404,1.600319,0.133491
max,1.80409,1.492278,2.419546,0.413671


**Transposing data**

In [15]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,1.24724,-1.617793,-1.736064,-0.628743,1.187242,1.80409
B,-0.298577,0.243773,0.098297,1.492278,-0.847169,-0.821284
C,-2.409358,-0.133399,0.463387,2.419546,0.311317,1.979296
D,-0.026067,-0.035955,-0.304373,0.186677,-0.582514,0.413671


**Sorting by axes**

In [16]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.026067,-2.409358,-0.298577,1.24724
2013-01-02,-0.035955,-0.133399,0.243773,-1.617793
2013-01-03,-0.304373,0.463387,0.098297,-1.736064
2013-01-04,0.186677,2.419546,1.492278,-0.628743
2013-01-05,-0.582514,0.311317,-0.847169,1.187242
2013-01-06,0.413671,1.979296,-0.821284,1.80409


**Sorting by values**

In [17]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-05,1.187242,-0.847169,0.311317,-0.582514
2013-01-06,1.80409,-0.821284,1.979296,0.413671
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067
2013-01-03,-1.736064,0.098297,0.463387,-0.304373
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955
2013-01-04,-0.628743,1.492278,2.419546,0.186677


### Selection

Pandas Data selections methods are : .at, .iat, .loc, .iloc

**Selecting a single column, which yields a Series**

In [18]:
df["A"]

2013-01-01    1.247240
2013-01-02   -1.617793
2013-01-03   -1.736064
2013-01-04   -0.628743
2013-01-05    1.187242
2013-01-06    1.804090
Freq: D, Name: A, dtype: float64

**Selecting via [ ]**

In [19]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955
2013-01-03,-1.736064,0.098297,0.463387,-0.304373


In [20]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955
2013-01-03,-1.736064,0.098297,0.463387,-0.304373
2013-01-04,-0.628743,1.492278,2.419546,0.186677


#### Selecting by Label

**For getting a cross section using a label**

In [22]:
df.loc[dates[0]] #selecting first data row values

A    1.247240
B   -0.298577
C   -2.409358
D   -0.026067
Name: 2013-01-01 00:00:00, dtype: float64

**Selecting on a multi-axis label**

In [23]:
df.loc[:,["A","B"]]

Unnamed: 0,A,B
2013-01-01,1.24724,-0.298577
2013-01-02,-1.617793,0.243773
2013-01-03,-1.736064,0.098297
2013-01-04,-0.628743,1.492278
2013-01-05,1.187242,-0.847169
2013-01-06,1.80409,-0.821284


**Showing label slicing, both endpoints are included**

In [24]:
df.loc["20130102":"20130104",["A","B"]]

Unnamed: 0,A,B
2013-01-02,-1.617793,0.243773
2013-01-03,-1.736064,0.098297
2013-01-04,-0.628743,1.492278


**Reduction in the dimensions of the returned object**

In [25]:
df.loc["20130102",["A","B"]]

A   -1.617793
B    0.243773
Name: 2013-01-02 00:00:00, dtype: float64

In [27]:
#Getting scalar value
df.loc[dates[0],"A"]

1.2472404724507258

**For getting fast access to a scalar**

In [28]:
df.at[dates[0],"A"]

1.2472404724507258

### Selection by Position

**Select via the position of the passed integers**

In [29]:
df.iloc[3]

A   -0.628743
B    1.492278
C    2.419546
D    0.186677
Name: 2013-01-04 00:00:00, dtype: float64

**By integer slices**

In [30]:
df.iloc[3:5,0:2]

Unnamed: 0,A,B
2013-01-04,-0.628743,1.492278
2013-01-05,1.187242,-0.847169


**By list of integer position locations**

In [31]:
df.iloc[[1,2,4],[0,2]]

Unnamed: 0,A,C
2013-01-02,-1.617793,-0.133399
2013-01-03,-1.736064,0.463387
2013-01-05,1.187242,0.311317


**For getting a value explicitly**

In [32]:
df.iloc[1,2]

-0.1333987025685469

In [34]:
# for getting fast access to a scalar 
df.iat[1,2]

-0.1333987025685469

### Boolean Indexing

**Using a single column's values to select data**

In [35]:
df[df["A"]>0]

Unnamed: 0,A,B,C,D
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067
2013-01-05,1.187242,-0.847169,0.311317,-0.582514
2013-01-06,1.80409,-0.821284,1.979296,0.413671


**Selecting values from a DataFrame where a boolean condition is met**

In [36]:
df[df>0]

Unnamed: 0,A,B,C,D
2013-01-01,1.24724,,,
2013-01-02,,0.243773,,
2013-01-03,,0.098297,0.463387,
2013-01-04,,1.492278,2.419546,0.186677
2013-01-05,1.187242,,0.311317,
2013-01-06,1.80409,,1.979296,0.413671


In [37]:
df2 = df.copy()
df2["E"] = ["one","one","two","three","four","three"]
df2 

Unnamed: 0,A,B,C,D,E
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067,one
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955,one
2013-01-03,-1.736064,0.098297,0.463387,-0.304373,two
2013-01-04,-0.628743,1.492278,2.419546,0.186677,three
2013-01-05,1.187242,-0.847169,0.311317,-0.582514,four
2013-01-06,1.80409,-0.821284,1.979296,0.413671,three


In [39]:
df2[df2["E"].isin(["two","four"])] #.isin() method is best suitable for selecting column values 

Unnamed: 0,A,B,C,D,E
2013-01-03,-1.736064,0.098297,0.463387,-0.304373,two
2013-01-05,1.187242,-0.847169,0.311317,-0.582514,four


### Setting

**Setting a new column automatically aligns the data by the indexes**

In [41]:
s1 = pd.Series([1,2,3,4,5,6],index=pd.date_range("20130102",periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

**Setting values by label**

In [43]:
df["F"]=s1
df

Unnamed: 0,A,B,C,D,F
2013-01-01,1.24724,-0.298577,-2.409358,-0.026067,
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955,1.0
2013-01-03,-1.736064,0.098297,0.463387,-0.304373,2.0
2013-01-04,-0.628743,1.492278,2.419546,0.186677,3.0
2013-01-05,1.187242,-0.847169,0.311317,-0.582514,4.0
2013-01-06,1.80409,-0.821284,1.979296,0.413671,5.0


**Setting Values by label**

In [45]:
df.at[dates[0],"A"]=0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,-0.298577,-2.409358,-0.026067,
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955,1.0
2013-01-03,-1.736064,0.098297,0.463387,-0.304373,2.0
2013-01-04,-0.628743,1.492278,2.419546,0.186677,3.0
2013-01-05,1.187242,-0.847169,0.311317,-0.582514,4.0
2013-01-06,1.80409,-0.821284,1.979296,0.413671,5.0


**Setting values by position**

In [47]:
df.iat[0,4]=0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,-0.298577,-2.409358,-0.026067,0.0
2013-01-02,-1.617793,0.243773,-0.133399,-0.035955,1.0
2013-01-03,-1.736064,0.098297,0.463387,-0.304373,2.0
2013-01-04,-0.628743,1.492278,2.419546,0.186677,3.0
2013-01-05,1.187242,-0.847169,0.311317,-0.582514,4.0
2013-01-06,1.80409,-0.821284,1.979296,0.413671,5.0


### Handling Missing Data

**Pandas primarily uses *np.nan* to represent missing data.**

In [67]:
df = pd.DataFrame(
    np.random.rand(5,3),
    index = ['a','c','e','f','h'],
    columns= ["One","Two","Three"]
)
df

Unnamed: 0,One,Two,Three
a,0.830831,0.59224,0.348178
c,0.044574,0.478757,0.892334
e,0.094114,0.219222,0.945512
f,0.78002,0.685769,0.275879
h,0.215912,0.780821,0.393482


In [68]:
df['four'] = "bar"
df["five"]= df["One"]>0
df

Unnamed: 0,One,Two,Three,four,five
a,0.830831,0.59224,0.348178,bar,True
c,0.044574,0.478757,0.892334,bar,True
e,0.094114,0.219222,0.945512,bar,True
f,0.78002,0.685769,0.275879,bar,True
h,0.215912,0.780821,0.393482,bar,True


In [69]:
df2 = df.reindex(["a","b","c","d","e","f","g","h"])
df2

Unnamed: 0,One,Two,Three,four,five
a,0.830831,0.59224,0.348178,bar,True
b,,,,,
c,0.044574,0.478757,0.892334,bar,True
d,,,,,
e,0.094114,0.219222,0.945512,bar,True
f,0.78002,0.685769,0.275879,bar,True
g,,,,,
h,0.215912,0.780821,0.393482,bar,True


**To make detecting missing values, pandas provide *isna()* and *notna()* methods**

In [70]:
df2["One"]

a    0.830831
b         NaN
c    0.044574
d         NaN
e    0.094114
f    0.780020
g         NaN
h    0.215912
Name: One, dtype: float64

In [71]:
pd.isna(df2['One'])

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: One, dtype: bool

In [72]:
df2["four"].notna()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: four, dtype: bool

In [73]:
df2.isna()

Unnamed: 0,One,Two,Three,four,five
a,False,False,False,False,False
b,True,True,True,True,True
c,False,False,False,False,False
d,True,True,True,True,True
e,False,False,False,False,False
f,False,False,False,False,False
g,True,True,True,True,True
h,False,False,False,False,False


### Datetimes

**For datetime types, NaT represents missing values. Pandas objects provide compatibiliy**

In [74]:
df2 = df.copy()

In [75]:
df2["timestamp"]=pd.Timestamp("20120101")
df2

Unnamed: 0,One,Two,Three,four,five,timestamp
a,0.830831,0.59224,0.348178,bar,True,2012-01-01
c,0.044574,0.478757,0.892334,bar,True,2012-01-01
e,0.094114,0.219222,0.945512,bar,True,2012-01-01
f,0.78002,0.685769,0.275879,bar,True,2012-01-01
h,0.215912,0.780821,0.393482,bar,True,2012-01-01


In [77]:
df2.loc[["a","c","h"],["One","timestamp"]]=np.nan
df2

Unnamed: 0,One,Two,Three,four,five,timestamp
a,,0.59224,0.348178,bar,True,NaT
c,,0.478757,0.892334,bar,True,NaT
e,0.094114,0.219222,0.945512,bar,True,2012-01-01
f,0.78002,0.685769,0.275879,bar,True,2012-01-01
h,,0.780821,0.393482,bar,True,NaT


In [78]:
df2.dtypes.value_counts()

float64           3
datetime64[ns]    1
object            1
bool              1
dtype: int64

### Inserting Missing Data

**We can insert missing values by simply assigning to containers. The actual missing value used will be chosen based on dtype**

In [79]:
s = pd.Series([1,2,3])
s.loc[0]=None
s

0    NaN
1    2.0
2    3.0
dtype: float64

In [80]:
s = pd.Series(["a","b","c"])
s.loc[0]=None
s.loc[1]=np.nan
s

0    None
1     NaN
2       c
dtype: object

### Calculations With Missing Data 

In [97]:
a= df2.loc[:,["One","Two"]]
b = df2.loc[:,["One","Two","Three"]]
a

Unnamed: 0,One,Two
a,,0.59224
c,,0.478757
e,0.094114,0.219222
f,0.78002,0.685769
h,,0.780821


In [98]:
b

Unnamed: 0,One,Two,Three
a,,0.59224,0.348178
c,,0.478757,0.892334
e,0.094114,0.219222,0.945512
f,0.78002,0.685769,0.275879
h,,0.780821,0.393482


In [99]:
a+b

Unnamed: 0,One,Three,Two
a,,,1.184481
c,,,0.957515
e,0.188227,,0.438445
f,1.56004,,1.371537
h,,,1.561643


**The descriptive statistics and computation methods are all written to account for missing data.
For Ex:** 
* When summing data, NA(missing) values will be treated as zero.
* If the data are all NA, the result will be 0
* Cumulative methods like cumsum() and cumprod() ignore NA values by default, but preserve them in the resulting arrays.

In [102]:
df2

Unnamed: 0,One,Two,Three,four,five,timestamp
a,,0.59224,0.348178,bar,True,NaT
c,,0.478757,0.892334,bar,True,NaT
e,0.094114,0.219222,0.945512,bar,True,2012-01-01
f,0.78002,0.685769,0.275879,bar,True,2012-01-01
h,,0.780821,0.393482,bar,True,NaT


In [103]:
df2["One"].sum()

0.8741334747001472

In [104]:
df2.mean(1)

  df2.mean(1)


a    0.646806
c    0.790364
e    0.564712
f    0.685417
h    0.724768
dtype: float64

### NA values in GroupBy
NA groups in GroupBy are automatically excluded.This behaviour is consistent with R.

In [106]:
df

Unnamed: 0,One,Two,Three,four,five
a,0.830831,0.59224,0.348178,bar,True
c,0.044574,0.478757,0.892334,bar,True
e,0.094114,0.219222,0.945512,bar,True
f,0.78002,0.685769,0.275879,bar,True
h,0.215912,0.780821,0.393482,bar,True


In [108]:
df.groupby("One").mean()

Unnamed: 0_level_0,Two,Three,five
One,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.044574,0.478757,0.892334,True
0.094114,0.219222,0.945512,True
0.215912,0.780821,0.393482,True
0.78002,0.685769,0.275879,True
0.830831,0.59224,0.348178,True


### GroupBy: Split-apply-combine

By **group by** we are referring to a process involving one or more of the following steps:-
* **splitting** the data into groups based on some criteria
* **Applying** a function to each group independently 
* **Combining** the results into a data structure

In the apply step, we might wish to do one of the following:-
* **Aggregation:** Compute a summary statistics for each group.
 1. Compute group sums or means
 2. compute group sizes or counts
* **Transformation:** Perform some group-specific computations and return a like-indexed object.
 1. Standardized data(zscore) within a group.
 2. Filling NAs with a value derived from each group.
* **Filtration:** Discard some groups, according to a group-wise computation that evaluates.
 1. Discard data that belongs to groups with only a few members.
 2. Filter out data based on the group sum  or mean
 


In [133]:
df = pd.DataFrame(
    [("bird","Falconiformers",389.0),
     ("bird","Psittaciformers",24.0),
     ("mammal","Carnivora",80.2),
     ("mammal","Primates",np.nan),
     ("mammal","Carnivora",58)
    ],
    index = ["falcon","parrot","lion","monkey","leopard"],
    columns= ("class","order","max_speed")
)
df

Unnamed: 0,class,order,max_speed
falcon,bird,Falconiformers,389.0
parrot,bird,Psittaciformers,24.0
lion,mammal,Carnivora,80.2
monkey,mammal,Primates,
leopard,mammal,Carnivora,58.0


In [134]:
grouped = df.groupby("class")
grouped.all()

Unnamed: 0_level_0,order,max_speed
class,Unnamed: 1_level_1,Unnamed: 2_level_1
bird,True,True
mammal,True,True


In [136]:
df = pd.DataFrame(
        {
            "A":["foo","bar","foo","bar","foo","bar","foo","foo"],
            "B":["one","one","two","three","two","two","one","three"],
            "C":np.random.randn(8),
            "D":np.random.randn(8),
        }
)
df.head()

Unnamed: 0,A,B,C,D
0,foo,one,0.635464,-0.580013
1,bar,one,-0.07528,0.547896
2,foo,two,0.3255,-0.757804
3,bar,three,1.570853,0.014844
4,foo,two,1.910471,-0.662375


In [141]:
grouped = df.groupby("A")
grouped.first()

Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.07528,0.547896
foo,one,0.635464,-0.580013


### GroupBy sorting 

In [142]:
df2 = pd.DataFrame(
    {
        "X":["B","B","A","A"],
        "Y":[1,2,3,4]
    }
)
df2.groupby(["X"]).sum()

Unnamed: 0_level_0,Y
X,Unnamed: 1_level_1
A,7
B,3


In [143]:
df2.groupby(["X"],sort=False).sum()

Unnamed: 0_level_0,Y
X,Unnamed: 1_level_1
B,3
A,7


In [149]:
df3 = pd.DataFrame(
    {
        "X":["A","B","A","B"],
                   "Y":[1,4,3,2]
    }
  )
print(df3.groupby(["X"]).get_group("A"))
print(df3.groupby(["X"]).get_group("B"))

   X  Y
0  A  1
2  A  3
   X  Y
1  B  4
3  B  2


### GroupBy dropna
By default NA values are excluded from group keys during the **groupby** operation.In case we want to include NA values in group keys, we could pass **dropna = False** to achive it.

In [150]:
df_list = [[1,2,3],[1,None,4],[2,1,3],[1,2,3]]
df_dropna = pd.DataFrame(df_list,columns = ["a","b","c"])
df_dropna

Unnamed: 0,a,b,c
0,1,2.0,3
1,1,,4
2,2,1.0,3
3,1,2.0,3


In [152]:
# Default 'dropna' is set to True, which will exclude nan in keys
df_dropna.groupby(by = ["b"],dropna = True).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,6


In [153]:
#In order to allow NaN in keys, set dropna to False
df_dropna.groupby(by=["b"],dropna=False).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,6
,1,4


### GroupBy object attributes


In [154]:
df.groupby("A").groups

{'bar': [1, 3, 5], 'foo': [0, 2, 4, 6, 7]}

In [158]:
grouped = df.groupby(["A","B"])
print(grouped.groups)
print(len(grouped))

{('bar', 'one'): [1], ('bar', 'three'): [3], ('bar', 'two'): [5], ('foo', 'one'): [0, 6], ('foo', 'three'): [7], ('foo', 'two'): [2, 4]}
6


In [159]:
df

Unnamed: 0,A,B,C,D
0,foo,one,0.635464,-0.580013
1,bar,one,-0.07528,0.547896
2,foo,two,0.3255,-0.757804
3,bar,three,1.570853,0.014844
4,foo,two,1.910471,-0.662375
5,bar,two,-0.30273,1.419697
6,foo,one,-0.608595,0.889889
7,foo,three,0.802533,-0.438494


### GroupBy with MutliIndex

In [4]:
arrays = [
        ["bar","bar","baz","baz","foo","foo","qux","qux"],
        ["one","two","one","two","one","two","one","two"],
]

In [5]:
index = pd.MultiIndex.from_arrays(arrays,names = ["first","second"])

In [6]:
s=pd.Series(np.random.randn(8),index = index)
s

first  second
bar    one      -0.146054
       two      -0.860446
baz    one      -1.877689
       two       1.605024
foo    one       0.054475
       two       0.560015
qux    one      -1.078320
       two       0.984333
dtype: float64

We can group by one of the levels in S

In [7]:
grouped = s.groupby(level =0)
grouped.sum()

first
bar   -1.006500
baz   -0.272665
foo    0.614490
qux   -0.093987
dtype: float64

In [8]:
grouped = s.groupby(level =1)
grouped.sum()

second
one   -3.047588
two    2.288926
dtype: float64

In [9]:
s

first  second
bar    one      -0.146054
       two      -0.860446
baz    one      -1.877689
       two       1.605024
foo    one       0.054475
       two       0.560015
qux    one      -1.078320
       two       0.984333
dtype: float64

In [10]:
s.groupby(["first"]).sum()

first
bar   -1.006500
baz   -0.272665
foo    0.614490
qux   -0.093987
dtype: float64

 ### Selecting a group