<a href="https://colab.research.google.com/github/cstar-industries/python-3-beginner/blob/master/008-Essential-Python-Libraries/Essential%20Python%20Libraries%20-%20Chap%2006%20-%20Pandas%20-%20Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pandas

## Basics

In [0]:
import pandas as pd
import numpy as np

In [0]:
s = pd.Series([6., 5., 6., 3.])
print(s)

In [0]:
pd.Series(np.linspace(0, 10, 13))

In [0]:
r = pd.date_range('2020-05-01', '2020-05-15', freq='1d')
print(r)

In [0]:
df = pd.DataFrame(np.random.random((10, 4)), columns=['Col1', 'Col2', 'Col3', 'Col4'])
print(df)

In [0]:
print(df['Col1'])
type(df['Col1'])

In [0]:
s1 = np.linspace(0, 2*np.pi, 10)
s2 = np.sin(s1)
df2 = pd.DataFrame({'time': s1, 'signal': s2}, index=pd.date_range('18:00', '23:00', periods=10))
print(df2)

In [0]:
print(df2.head())

In [0]:
print(df2.tail())

In [0]:
print(df2.index)

In [0]:
print(df2.columns)

In [0]:
df2.to_numpy()

In [0]:
print(df2.sort_values('signal'))

## Indexing

In [0]:
print(df2[2:4])

In [0]:
print(df2['2020-05-11 20:00':'2020-05-11 21:00'])

In [0]:
print(df2.signal > 0)
print()
print(df2[df2.signal > 0])

In [0]:
print(df2.loc['19:06:40'])

In [0]:
df2.at['19:06:40', 'time']

In [0]:
df2.iloc[:2, 1]

In [0]:
df2.iat[0, 0]

## Operations

In [0]:
print(df)
print()
print(df.sum())
print()
print(df.sum(axis=1))

In [0]:
print(df.describe())

In [0]:
print(df.apply(lambda x: np.round(x)))

In [0]:
votes = pd.DataFrame({'voter_id': [f'voter_{i}' for i in range(20)],
                      'color': ['red' if np.random.random() < .5 else 'blue' for _ in range(20)]})
print(votes)

In [0]:
candidates = pd.DataFrame({'color': ['red', 'blue'], 'name': ['Alice', 'Bob']})
print(candidates)

In [0]:
df3 = pd.merge(votes, candidates, on='color')
print(df3)

In [0]:
print(df3.groupby('name').count())

## Importing data

In [0]:
import requests

with open('COVID19_Fallzahlen_CH_total_v2.csv', 'wb') as f:
  res = requests.get('https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv')
  f.write(res.content)

In [0]:
df = pd.read_csv('COVID19_Fallzahlen_CH_total_v2.csv')
print(df.columns)

In [0]:
df = df.loc[:, ['date','abbreviation_canton_and_fl','current_hosp']]
print(df.head())

In [0]:
df = df.dropna(subset=['current_hosp'])
print(df.head())

In [0]:
df = df.set_index(df['date'].apply(pd.to_datetime))
print(df)

In [0]:
df = df[['abbreviation_canton_and_fl', 'current_hosp']]
print(df)

In [0]:
print(df.groupby('abbreviation_canton_and_fl').max().sort_values('current_hosp', ascending=False))

In [0]:
print(df.groupby('date').sum())

In [0]:
import matplotlib.pyplot as plt

df.groupby('date').sum().plot()
plt.show()

In [0]:
plt.figure(figsize=(32, 16))
df.groupby([ 'abbreviation_canton_and_fl'])['current_hosp'].plot(legend=True)
plt.show()