In [2]:
import pandas as pd
from io import StringIO
import copy

data = StringIO("""
"A","Iqweqw","3","1"
"A","am","12","2"
"A","asd","11","3"
"B","I am","11","4"
"B","B","12","5"
"B","you?","1","6"
"C","Hello I am C","4","7"
""")

# load string as stream into dataframe
df_ = pd.read_csv(data,header=0, names=["name","text","month","order"])
df_.sort_values(by='order')

Unnamed: 0,name,text,month,order
0,A,am,12,2
1,A,asd,11,3
2,B,I am,11,4
3,B,B,12,5
4,B,you?,1,6
5,C,Hello I am C,4,7


Let's consider the case we want, for each of the unique values of `name` only one row (the first one) from the dataframe.
We can do so as follows:

In [15]:
%%time
df_.groupby(['name']).apply(lambda s: s.head(1))

CPU times: user 5.18 ms, sys: 601 µs, total: 5.78 ms
Wall time: 6.59 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,name,text,month,order
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,0,A,am,12,2
B,2,B,I am,11,4
C,5,C,Hello I am C,4,7


Another way to do this would be using 'drop_duplicates'

In [14]:
%%time
df_.drop_duplicates(subset=["name"], keep='first')

CPU times: user 2 ms, sys: 908 µs, total: 2.91 ms
Wall time: 2.3 ms


Unnamed: 0,name,text,month,order
0,A,am,12,2
2,B,I am,11,4
5,C,Hello I am C,4,7


Now we want to get as many rows as unique values of the column 'name' and these new rows
will have only 2 columns, `name` and `text`, where `text` joins the `text` strings from the original dataframe:

In [73]:
df_.groupby(['name'], as_index = False).agg({'text': ' '.join})

Unnamed: 0,name,text
0,A,am asd
1,B,I am B you?
2,C,Hello I am C


Maybe, instead of joining the strings, we might want to see a set containing the `text` of the the original rows of the dataframe that happened to have the same `name` value:

In [74]:
df_.groupby(['name'], as_index = False).agg({'text':set})

Unnamed: 0,name,text
0,A,"{asd, am}"
1,B,"{I am, you?, B}"
2,C,{Hello I am C}


Now let us pick the string of the first row that has the first `name` value

In [75]:
df_.groupby(['name'], as_index = False).agg({'text': lambda x: list(x)[0] })

Unnamed: 0,name,text
0,A,am
1,B,I am
2,C,Hello I am C


We can have different aggregation methods that target different columns. 
If we do not specify a dict of the form `{col_1: function_1, col_2:function_2}` then the method will be applied across columns

In [79]:
df_.groupby(['name'], as_index = False).agg(set)

Unnamed: 0,name,text,month,order
0,A,"{asd, am}","{11, 12}","{2, 3}"
1,B,"{I am, you?, B}","{1, 11, 12}","{4, 5, 6}"
2,C,{Hello I am C},{4},{7}


In [77]:
df = copy.deepcopy(df_)

df['text'] = df[['name','text','month']].groupby(['name','month'])['text'].transform(lambda x: ','.join(x))
df.groupby(['name','month'])['text'].apply(lambda x: ','.join(x)).reset_index()
df[['name','text','month']].drop_duplicates()

Unnamed: 0,name,text,month
0,A,am,12
1,A,asd,11
2,B,I am,11
3,B,B,12
4,B,you?,1
5,C,Hello I am C,4


In [78]:
df = copy.deepcopy(df_)

df['text'] = df.groupby(['month'])['text'].transform(lambda x: ','.join(x))
df.groupby(['name','month'])['text'].apply(lambda x: ','.join(x)).reset_index()

Unnamed: 0,name,month,text
0,A,11,"asd,I am"
1,A,12,"am,B"
2,B,1,you?
3,B,11,"asd,I am"
4,B,12,"am,B"
5,C,4,Hello I am C
