In [51]:
import numpy as np
import pandas as pd

# Introducing Pandas


Pandas is an open source library providing high-performance,  
easy-to-use data structures and data analysis tools for the Python.


In [52]:
def series_info(series: pd.Series) -> None:
    print(f"ndim: {series.ndim}")
    print(f"shape: {series.shape}")
    print(f"size: {series.size}")
    print(f"dtype: {series.dtype}")
    print(f"values:\n{series}\n")

In [53]:
def df_info(df: pd.DataFrame) -> None:
    print(f"ndim: {df.ndim}")
    print(f"shape: {df.shape}")
    print(f"size: {df.size}")
    print(f"dtype: {df.dtypes}")
    print(f"values:\n{df}\n")

In [54]:
data = pd.Series(
    data=[0.25, 0.5, 0.75, 1.0],
    dtype=np.float32,
)

In [55]:
series_info(data)

ndim: 1
shape: (4,)
size: 4
dtype: float32
values:
0    0.25
1    0.50
2    0.75
3    1.00
dtype: float32



In [56]:
print(type(data.values))
print(data.values)

<class 'numpy.ndarray'>
[0.25 0.5  0.75 1.  ]


In [57]:
print(type(data.index))
print(data.index)

<class 'pandas.core.indexes.range.RangeIndex'>
RangeIndex(start=0, stop=4, step=1)


In [58]:
print(data[1])

0.5


In [59]:
slice_data = data[1:3]

In [60]:
series_info(slice_data)

ndim: 1
shape: (2,)
size: 2
dtype: float32
values:
1    0.50
2    0.75
dtype: float32



In [61]:
slice_data[1]

0.5

### pd.Series

Series is a one-dimensional labeled array capable of holding any data type.  
The axis labels are collectively referred to as the index.

The basic method to create a Series is to call:

```python
s = pd.Series(data, index=index)
```

-   an iterable object (list, np.ndarray)
-   a scalar value (like 5)


In [62]:
data = pd.Series(
    [0.25, 0.5, 0.75, 1.0],
    index=["a", "b", "c", "d"],
)

In [63]:
series_info(data)

ndim: 1
shape: (4,)
size: 4
dtype: float64
values:
a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64



In [64]:
print(data["a"])

0.25


In [65]:
colors_dict = {
    "Red": 12,
    "Green": 8,
    "Blue": 33,
}

In [66]:
colors_s = pd.Series(
    colors_dict,
    dtype=np.int8,
)

In [67]:
series_info(colors_s)

ndim: 1
shape: (3,)
size: 3
dtype: int8
values:
Red      12
Green     8
Blue     33
dtype: int8



In [68]:
print(colors_s["Green"])

8


In [69]:
series_info(pd.Series([2, 4, 6]))

ndim: 1
shape: (3,)
size: 3
dtype: int64
values:
0    2
1    4
2    6
dtype: int64



## pd.DataFrame

DataFrame is a 2-dimensional labeled data structure with columns of potentially different types.  
You can think of it like a spreadsheet or SQL table, or a dict of Series objects.  
It is generally the most commonly used pandas object.

Like Series, DataFrame accepts many different kinds of input:

-   Dict of 1D ndarrays, lists, dicts, or Series
-   2-D numpy.ndarray
-   Structured or record ndarray
-   A Series
-   Another DataFrame


In [70]:
codes_dict = {
    "Red": "#FF0000",
    "Green": "#00FF00",
    "Blue": "#0000FF",
}

In [71]:
codes_s = pd.Series(
    codes_dict,
    dtype="string",
)

In [72]:
series_info(codes_s)

ndim: 1
shape: (3,)
size: 3
dtype: string
values:
Red      #FF0000
Green    #00FF00
Blue     #0000FF
dtype: string



In [73]:
combined_df = pd.DataFrame(
    {
        "count": colors_s,
        "code": codes_s,
    }
)

In [74]:
df_info(combined_df)

ndim: 2
shape: (3, 2)
size: 6
dtype: count              int8
code     string[python]
dtype: object
values:
       count     code
Red       12  #FF0000
Green      8  #00FF00
Blue      33  #0000FF



In [75]:
print(combined_df.index)

Index(['Red', 'Green', 'Blue'], dtype='object')


In [76]:
print(combined_df.columns)

Index(['count', 'code'], dtype='object')


In [77]:
print(type(combined_df["count"]))

<class 'pandas.core.series.Series'>


In [78]:
series_info(combined_df["count"])

ndim: 1
shape: (3,)
size: 3
dtype: int8
values:
Red      12
Green     8
Blue     33
Name: count, dtype: int8

