# Pandas - Ausgaben und Zuweisungen in DataFrames

In [1]:
import os
import numpy as np
import pandas as pd

### Pandas Indices
Pandas DataFrame haben Zeilen und Spalten Indices. Man kann ueber Integer-Positionen oder uerber Labels auf Zellen, Zeilen oder Spalten zugreifen.

#### Zeilenausgabe mit `iloc` und `loc`
`iloc` gibt Zeilen (oder Spalten) an den Index-Positionen aus (nur Integer)  
`loc` gibt Zeilen (oder Spalten) vom den Index-Labels aus

In [2]:
df = pd.DataFrame(np.random.rand(10,3), columns=list('abc'))
df

Unnamed: 0,a,b,c
0,0.06861,0.298421,0.005939
1,0.931189,0.991086,0.688549
2,0.667031,0.021365,0.704624
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,0.976487
5,0.667918,0.095848,0.919606
6,0.638257,0.450648,0.502755
7,0.223559,0.148856,0.105227
8,0.952569,0.09965,0.58559
9,0.602501,0.757641,0.243319


### Ausgabe einzelner Zellen

In [3]:
# spezifische Zelle ausgeben mit Integer Indices
df.iloc[1,1]
# oder
df.iat[1,1]

0.99108615316913073

In [4]:
# spezifische Zelle ausgeben mit Label ausgeben
idx = df.index[4]
df.loc[idx,'c']
# oder
df.at[idx,'c']

0.97648657406595618

### Zeilen ausgeben

In [5]:
# dritte Zeile ausgeben (Zeile mit Index=2)
df.iloc[2,:]

a    0.667031
b    0.021365
c    0.704624
Name: 2, dtype: float64

In [6]:
# die ersten drei Zeilen ausgeben
df.iloc[:3,:]
# oder 
df[:3]

Unnamed: 0,a,b,c
0,0.06861,0.298421,0.005939
1,0.931189,0.991086,0.688549
2,0.667031,0.021365,0.704624


In [7]:
# Zeilen von 7 bis zur letzten Zeile ausgeben
df.iloc[7:,:]
# oder
df[7:]

Unnamed: 0,a,b,c
7,0.223559,0.148856,0.105227
8,0.952569,0.09965,0.58559
9,0.602501,0.757641,0.243319


In [8]:
# letzten zwei Zeilen ausgeben
df.iloc[-2:,:]
# oder
df[-2:]

Unnamed: 0,a,b,c
8,0.952569,0.09965,0.58559
9,0.602501,0.757641,0.243319


In [9]:
# zwei Zeile (2 und 3) ausgeben (exklusiv Ende)
df.iloc[2:4,:]

Unnamed: 0,a,b,c
2,0.667031,0.021365,0.704624
3,0.189492,0.662468,0.489104


#### Spezifische Zeilen ausgeben (Bedingte Ausgabe)

In [10]:
# spezifische Zeilen ausgeben (nicht iloc verwenden!)
# iloc würde nur bei Integer Indices funktionieren
idx = df.index[df['b']>0.6]
df.loc[idx.values]

Unnamed: 0,a,b,c
1,0.931189,0.991086,0.688549
3,0.189492,0.662468,0.489104
9,0.602501,0.757641,0.243319


### Spalten Ausgabe

In [11]:
# Spalte b ausgeben
df['b']
# oder 
df.loc[:,'b']

0    0.298421
1    0.991086
2    0.021365
3    0.662468
4    0.060801
5    0.095848
6    0.450648
7    0.148856
8    0.099650
9    0.757641
Name: b, dtype: float64

In [12]:
# Spalte a und c ausgeben
df[['a','c']]
# oder
df.loc[:,['a','c']]

Unnamed: 0,a,c
0,0.06861,0.005939
1,0.931189,0.688549
2,0.667031,0.704624
3,0.189492,0.489104
4,0.770412,0.976487
5,0.667918,0.919606
6,0.638257,0.502755
7,0.223559,0.105227
8,0.952569,0.58559
9,0.602501,0.243319


In [13]:
# zweite Spate ausgeben 
df.iloc[:,[1]]

Unnamed: 0,b
0,0.298421
1,0.991086
2,0.021365
3,0.662468
4,0.060801
5,0.095848
6,0.450648
7,0.148856
8,0.09965
9,0.757641


In [14]:
# erste und dritte Spalte ausgeben
df.iloc[:,[0,2]]

Unnamed: 0,a,c
0,0.06861,0.005939
1,0.931189,0.688549
2,0.667031,0.704624
3,0.189492,0.489104
4,0.770412,0.976487
5,0.667918,0.919606
6,0.638257,0.502755
7,0.223559,0.105227
8,0.952569,0.58559
9,0.602501,0.243319


In [15]:
# letzte Spalte ausgeben
df.iloc[:,-1:]

Unnamed: 0,c
0,0.005939
1,0.688549
2,0.704624
3,0.489104
4,0.976487
5,0.919606
6,0.502755
7,0.105227
8,0.58559
9,0.243319


In [16]:
# letzten beiden Spalte ausgeben
df.iloc[:,-2:]

Unnamed: 0,b,c
0,0.298421,0.005939
1,0.991086,0.688549
2,0.021365,0.704624
3,0.662468,0.489104
4,0.060801,0.976487
5,0.095848,0.919606
6,0.450648,0.502755
7,0.148856,0.105227
8,0.09965,0.58559
9,0.757641,0.243319


#### Spezifische Spalten ausgeben (Bedingte Ausgabe)

In [17]:
# Spalten bei denen der Wert in der ersten Zeile groesser als Schwellwert ist
# Bem.: wegen loc muss index verwendet werden, wenn man nur 3 Zeilen ausgeben moechte
idx = df.index[0:3]
clx = df.columns[df.iloc[0,:]>0.6]
df.loc[idx,clx]

0
1
2


### Wert einer Zellen Zuweisungen

In [18]:
# Spezifischer Zelle einen Wert zuweisen
df.at[2,'c'] = 3.0
df.iat[1,1] = 4.0
df.iloc[2,2] = 7
df.loc[df.index[0],'a'] = 8
df[:3]

Unnamed: 0,a,b,c
0,8.0,0.298421,0.005939
1,0.931189,4.0,0.688549
2,0.667031,0.021365,7.0


### Werte mehreren Zellen Zuweisungen

In [19]:
# Wert 0.0 an spezifische Zeilen zuweisen
idx = df.index[df['a']>0.7]
df.loc[idx,['c']] = 0.0
df

Unnamed: 0,a,b,c
0,8.0,0.298421,0.0
1,0.931189,4.0,0.0
2,0.667031,0.021365,7.0
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,0.0
5,0.667918,0.095848,0.919606
6,0.638257,0.450648,0.502755
7,0.223559,0.148856,0.105227
8,0.952569,0.09965,0.0
9,0.602501,0.757641,0.243319


In [20]:
# oder mit einer anderen Bedingung
idx = df.index[(df['a']>0.7) & (df['b']>0.3)]
df.loc[idx,['c']] = 0.0
df

Unnamed: 0,a,b,c
0,8.0,0.298421,0.0
1,0.931189,4.0,0.0
2,0.667031,0.021365,7.0
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,0.0
5,0.667918,0.095848,0.919606
6,0.638257,0.450648,0.502755
7,0.223559,0.148856,0.105227
8,0.952569,0.09965,0.0
9,0.602501,0.757641,0.243319


In [21]:
# Zuweisung an mehrere spezifische Zellen 
idx = np.random.choice(range(9), 5, replace=False)
clx = np.random.randint(3, size=(5))
print(idx)
print(clx)
for q,p in zip(idx,clx):
    df.iloc[q,p] = np.NaN
df

[6 8 4 7 2]
[1 0 2 1 2]


Unnamed: 0,a,b,c
0,8.0,0.298421,0.0
1,0.931189,4.0,0.0
2,0.667031,0.021365,
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,
5,0.667918,0.095848,0.919606
6,0.638257,,0.502755
7,0.223559,,0.105227
8,,0.09965,0.0
9,0.602501,0.757641,0.243319


In [22]:
# Ausgabe mit List Comprehension
[df.iloc[q,p] for q,p in zip(idx,clx)]

[nan, nan, nan, nan, nan]

### np.NaN

In [23]:
# NaN ersetzen 
df.fillna(9.0)

Unnamed: 0,a,b,c
0,8.0,0.298421,0.0
1,0.931189,4.0,0.0
2,0.667031,0.021365,9.0
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,9.0
5,0.667918,0.095848,0.919606
6,0.638257,9.0,0.502755
7,0.223559,9.0,0.105227
8,9.0,0.09965,0.0
9,0.602501,0.757641,0.243319


In [24]:
# Zellen mit NaN bestimmen und dann loeschen
idx = df.index[df['b'].isnull()]
print(idx)
df.drop(idx)

Int64Index([6, 7], dtype='int64')


Unnamed: 0,a,b,c
0,8.0,0.298421,0.0
1,0.931189,4.0,0.0
2,0.667031,0.021365,
3,0.189492,0.662468,0.489104
4,0.770412,0.060801,
5,0.667918,0.095848,0.919606
8,,0.09965,0.0
9,0.602501,0.757641,0.243319
