# Pandas - Spzielle Funktionen mit DataFrames

In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
# Erste Zeile aus der Gruppe ausgeben
df = pd.DataFrame(np.random.rand(6,3), columns=list('abc'))
df['x'] = ['x1','x2','x1','x1','x3','x2']
df['y'] = ['p','q','q','p','p','q']
print(df)
print(df.groupby(['x','y']).count())
df.groupby(['x','y']).first()

          a         b         c   x  y
0  0.468666  0.472105  0.702097  x1  p
1  0.258108  0.853807  0.113598  x2  q
2  0.825803  0.368848  0.932599  x1  q
3  0.045474  0.072261  0.993649  x1  p
4  0.603943  0.512288  0.425238  x3  p
5  0.712150  0.751647  0.342579  x2  q
      a  b  c
x  y         
x1 p  2  2  2
   q  1  1  1
x2 q  2  2  2
x3 p  1  1  1


Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
x1,p,0.468666,0.472105,0.702097
x1,q,0.825803,0.368848,0.932599
x2,q,0.258108,0.853807,0.113598
x3,p,0.603943,0.512288,0.425238


In [3]:
# Funktionsanwendung und Mapping z.B auf Zeilen
df = pd.DataFrame(np.random.rand(4,3), columns=list('abc'))
df['max-min'] = df.apply(lambda x: x.max() - x.min(), axis=1)
df

Unnamed: 0,a,b,c,max-min
0,0.896794,0.996437,0.633982,0.362455
1,0.414034,0.065443,0.025083,0.38895
2,0.087632,0.166465,0.683547,0.595914
3,0.892705,0.407109,0.88576,0.485596


In [4]:
# Funktionsanwendung elementweise
df = pd.DataFrame(np.random.rand(4,3), columns=list('abc'))
df.applymap(lambda x: '%.2f' %x)

Unnamed: 0,a,b,c
0,0.24,0.9,0.46
1,0.05,0.06,0.02
2,0.18,0.63,0.91
3,0.09,0.18,0.62


In [5]:
# Sortierung nach Columns
df = pd.DataFrame(np.random.rand(4,3), columns=list('abc'))
df.sort_index(axis=1,ascending=False)

Unnamed: 0,c,b,a
0,0.165208,0.342897,0.485278
1,0.314732,0.944473,0.915802
2,0.347816,0.003814,0.638826
3,0.923833,0.907187,0.390604


In [6]:
# Sortierung nach Zeilen
df = pd.DataFrame(np.random.rand(4,3), columns=list('abc'))
df.sort_index(axis=0,ascending=False)

Unnamed: 0,a,b,c
3,0.865614,0.065848,0.332154
2,0.3415,0.307442,0.30984
1,0.218455,0.557855,0.893527
0,0.779936,0.657453,0.435188


In [7]:
# DatenFrames zusammenfuegen (rbind)
df1 = pd.DataFrame(np.random.randint(10, size=(3, 4)), columns=list('abcd'))
df2 = pd.DataFrame(np.random.randint(10, size=(3, 4)), columns=list('abcd'))
df3 = pd.DataFrame(np.random.randint(10, size=(3, 4)), columns=list('abcd'))
frames = [df1,df2]
frames.append(df3)
#ignore_index=True um Index richtig zu setzen
pd.concat(frames, ignore_index=True)

Unnamed: 0,a,b,c,d
0,6,8,9,2
1,0,7,4,3
2,8,5,9,3
3,3,1,0,5
4,9,2,8,4
5,2,6,6,0
6,4,0,3,3
7,2,9,3,8
8,6,1,6,9


In [8]:
# DatenFrames zusammenfuegen (cbind)
df1 = pd.DataFrame(np.random.randint(10, size=(3, 4)), columns=list('abcd'))
df2 = pd.DataFrame(np.random.randint(10, size=(3, 4)), columns=list('abcd'))
frames = [df1]
frames.append(df2)
df = pd.concat(frames, ignore_index=True,axis=1)
df.columns = list('abcdefgh')
df

Unnamed: 0,a,b,c,d,e,f,g,h
0,3,1,9,3,5,9,3,1
1,1,6,3,7,5,2,1,3
2,1,7,1,7,6,7,0,4
