### Pandas package

#### Motivation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
dow = pd.read_csv("data/dji.csv", index_col=0, parse_dates=True)

close = dow["Close"]
close.plot(ax=ax)

ax.set_xlabel("Date")
ax.set_ylabel("Price")
ax.set_title("DJI")

fig.savefig("out/dji.pdf", format="pdf")

### Series

In [None]:
# String representation of a Series

## 0 3
## 1 7
## 2 -8
## 3 4
## 4 26
## dtype: int64

#### Create Series

In [None]:
# Importing Pandas and creating a Series

import numpy as np
import pandas as pd

obj = pd.Series([2, -5, 9, 4])
obj

## 0 2
## 1 -5
## 2 9
## 3 4
## dtype: int64

In [None]:
# Series indexing vs. Numpy indexing

obj2 = pd.Series([2, -5, 9, 4], index=["a", "b", "c", "d"])
npobj = np.array([2, -5, 9, 4])

obj2
## a 2
## b -5
## c 9
## d 4
## dtype: int64

obj2["b"]
## -5

npobj[1]
## -5

In [4]:
# Series creation from Numpy arrays

npobj = np.array([2, -5, 9, 4])
obj2 = pd.Series(npobj, index=["a", "b", "c", "d"])

obj2
## a 2
## b -5
## c 9
## d 4
## dtype: int64# 

In [None]:
# Series from dicts

dictdata = {"Göttingen": 117665, "Northeim": 28920,
            "Hannover": 532163, "Berlin": 3574830}

obj3 = pd.Series(dictdata)

obj3
## Göttingen 117665
## Northeim 28920
## Hannover 532163
## Berlin 3574830
## dtype: int64

In [None]:
# Dict to Series with manual index

cities = ["Hamburg", "Göttingen", "Berlin", "Hannover"]
obj4 = pd.Series(dictdata, index=cities)

obj4
## Hamburg NaN
## Göttingen 117665.0
## Berlin 3574830.0
## Hannover 532163.0
## dtype: float64

#### Series properties

In [None]:
# Series properties

obj.values
## array([ 2, -5, 9, 4])

obj.index
## RangeIndex(start=0, stop=4, step=1)

obj2.index
## Index(['a', 'b', 'c', 'd'], dtype='object')

#### Selecting and manipulating values

In [None]:
# Series manipulation

obj2[["c", "d", "a"]]
## c 9
## d 4
## a 2
## dtype: int64

obj2[obj2 < 0]
## b -5
## dtype: int64

In [None]:
# Series functions

obj2 * 2
## a 4
## b -10
## c 18
## d 8
## dtype: int64

np.exp(obj2)["a":"c"]
## a 7.389056
## b 0.006738
## c 8103.083928
## dtype: float64

"c" in obj2
## True

In [None]:
# Series manipulation

obj4["Hamburg"] = 1900000

obj4
## Hamburg 1900000.0
## Göttingen 117665.0
## Berlin 3574830.0
## Hannover 532163.0
## dtype: float64

obj4[["Berlin", "Hannover"]] = [3600000, 1100000]

obj4
## Hamburg 1900000.0
## Göttingen 117665.0
## Berlin 3600000.0
## Hannover 1100000.0
## dtype: float64

#### Detect missing data

In [None]:
# NaN

pd.isnull(obj4)
## Hamburg False
## Göttingen False
## Berlin False
## Hannover False
## dtype: bool

pd.notnull(obj4)
## Hamburg True
## Göttingen True
## Berlin True
## Hannover True
## dtype: bool

#### Align differently indexed data

In [None]:
# Data 1

obj3
## Göttingen 117665
## Northeim 28920
## Hannover 532163
## Berlin 3574830
## dtype: int64

In [None]:
# Data 2

obj4
## Hamburg 1900000.0
## Göttingen 117665.0
## Berlin 3600000.0
## Hannover 1100000.0
## dtype: float64

In [None]:
# Align data

obj3 + obj4
## Berlin 7174830.0
## Göttingen 235330.0
## Hamburg NaN
## Hannover 1632163.0
## Northeim NaN
## dtype: float64

#### Naming Series

In [None]:
# Naming

obj4.name = "population"
obj4.index.name = "city"

obj4
## city
## Hamburg 1900000.0
## Göttingen 117665.0
## Berlin 3600000.0
## Hannover 1100000.0
## Name: population, dtype: float64

### Dataframe

### Import/Export data

In [2]:
%reload_ext watermark
%watermark -a "Caique Miranda" -gu "caiquemiranda" -iv

Author: Caique Miranda

Github username: caiquemiranda

sys: 3.10.5 (tags/v3.10.5:f377153, Jun  6 2022, 16:14:13) [MSC v.1929 64 bit (AMD64)]



### END.