<a href="https://colab.research.google.com/github/alexontour/snippets/blob/main/snip_pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Author: Alexander Kollmann, 08/2022**

---

**Funktion**

wichtige Pandas Funktionen anhand des Datensatztes "COVID-19 Schutzimpfungen - Ausgelieferte und bestellte Impfdosen" erklärt

---



**Referenzen**

https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html 

https://www.nagarajbhat.com/post/picture-pandas-little-guide/

---



In [1]:
import pandas as pd

In [3]:
# CSV in Dataframe einlesen
# https://www.data.gv.at/katalog/dataset/covid-19-schutzimpfungen-ausgelieferte-und-bestellte-impfdosen
url = 'https://info.gesundheitsministerium.gv.at/data/timeline-bbg.csv'
df = pd.read_csv(url, sep=';')

In [4]:
# Dataframe anzeigen und die ersten 5 Zeilen anzeigen
df.head()

Unnamed: 0,Datum,BundeslandID,Bevölkerung,Name,Auslieferungen,AuslieferungenPro100,Bestellungen,BestellungenPro100
0,2021-01-09T23:59:59+01:00,0,,Bundesverwaltung,0,,0,
1,2021-01-09T23:59:59+01:00,1,299235.0,Burgenland,355,0.118636,3995,1.335071
2,2021-01-09T23:59:59+01:00,2,566166.0,Kärnten,305,0.053871,6830,1.20636
3,2021-01-09T23:59:59+01:00,3,1708140.0,Niederösterreich,9042,0.529348,20070,1.174962
4,2021-01-09T23:59:59+01:00,4,1512226.0,Oberösterreich,520,0.034386,21395,1.414802


In [5]:
# Zusammenfasende Statistik - Achtung - macht nicht immer Sinn !
df.describe()

Unnamed: 0,BundeslandID,Bevölkerung,Auslieferungen,AuslieferungenPro100,Bestellungen,BestellungenPro100
count,6721.0,6110.0,6721.0,6110.0,6721.0,6110.0
mean,5.0,1805600.0,2421777.0,123.105462,2482004.0,125.811238
std,3.162513,2469485.0,4413478.0,73.055228,4493094.0,72.877072
min,0.0,299235.0,0.0,0.034386,0.0,0.783302
25%,2.0,564293.0,407429.0,60.438637,430783.0,67.139067
50%,5.0,1011691.0,1023272.0,124.669166,1054992.0,130.567237
75%,8.0,1708140.0,2523999.0,186.359114,2554440.0,186.818164
max,10.0,9027999.0,23830640.0,263.963742,25234240.0,279.510919


In [6]:
# Datentypen des Dataframe anzeigen
df.dtypes

Datum                    object
BundeslandID              int64
Bevölkerung             float64
Name                     object
Auslieferungen            int64
AuslieferungenPro100    float64
Bestellungen              int64
BestellungenPro100      float64
dtype: object

In [7]:
# Filtern - Spalten
df_filter = df.filter(['Datum','Name','Auslieferungen'])
df_filter

Unnamed: 0,Datum,Name,Auslieferungen
0,2021-01-09T23:59:59+01:00,Bundesverwaltung,0
1,2021-01-09T23:59:59+01:00,Burgenland,355
2,2021-01-09T23:59:59+01:00,Kärnten,305
3,2021-01-09T23:59:59+01:00,Niederösterreich,9042
4,2021-01-09T23:59:59+01:00,Oberösterreich,520
...,...,...,...
6716,2022-09-11T23:59:59+02:00,Steiermark,2671519
6717,2022-09-11T23:59:59+02:00,Tirol,1683977
6718,2022-09-11T23:59:59+02:00,Vorarlberg,822135
6719,2022-09-11T23:59:59+02:00,Wien,4300731


In [8]:
# Filter - Reihen
df_filter.loc[(df_filter.Name=='Salzburg')&(df_filter.Auslieferungen>=1000)] 

Unnamed: 0,Datum,Name,Auslieferungen
5,2021-01-09T23:59:59+01:00,Salzburg,1660
16,2021-01-10T23:59:59+01:00,Salzburg,1660
27,2021-01-11T23:59:59+01:00,Salzburg,3650
38,2021-01-12T23:59:59+01:00,Salzburg,4035
49,2021-01-13T23:59:59+01:00,Salzburg,4610
...,...,...,...
6671,2022-09-07T23:59:59+02:00,Salzburg,1248629
6682,2022-09-08T23:59:59+02:00,Salzburg,1248629
6693,2022-09-09T23:59:59+02:00,Salzburg,1249825
6704,2022-09-10T23:59:59+02:00,Salzburg,1250689


In [13]:
# Gruppiert darstellen
df.filter(['Datum','Name','Auslieferungen']).groupby('Name').max().sort_values('Name',ascending=True).head(15)

Unnamed: 0_level_0,Datum,Auslieferungen
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bundesverwaltung,2022-09-11T23:59:59+02:00,4916781
Burgenland,2022-09-11T23:59:59+02:00,615923
Kärnten,2022-09-11T23:59:59+02:00,1177364
Niederösterreich,2022-09-11T23:59:59+02:00,3431530
Oberösterreich,2022-09-11T23:59:59+02:00,2959995
Salzburg,2022-09-11T23:59:59+02:00,1250689
Steiermark,2022-09-11T23:59:59+02:00,2671519
Tirol,2022-09-11T23:59:59+02:00,1683977
Vorarlberg,2022-09-11T23:59:59+02:00,822135
Wien,2022-09-11T23:59:59+02:00,4300731
