# Series Deep Dive

### Loading Libraries

In [1]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# PyArrow
import pyarrow as pa

### Loading Data

In [2]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/' \
      'vehicles.csv.zip'

In [3]:
df = pd.read_csv(url,
                 dtype_backend='pyarrow',
                 engine='pyarrow')

In [4]:
city_mpg = df.city08

In [5]:
highway_mpg = df.highway08

In [6]:
city_mpg

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int64[pyarrow]

In [7]:
highway_mpg

0        25
1        14
2        33
3        12
4        23
         ..
41139    26
41140    28
41141    24
41142    24
41143    21
Name: highway08, Length: 41144, dtype: int64[pyarrow]

### Series Attributes

In [8]:
# Checking Length-Up
len(dir(city_mpg))

391

# Operators & (`Dunder Methods`)

### Dunder Methods

In [9]:
2 + 4

6

In [10]:
(city_mpg + highway_mpg) / 2

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

### Index Alignment

In [11]:
s1 = pd.Series([10, 20, 30], index=[1, 2, 2])

s2 = pd.Series([35, 44, 53], index=[2, 2, 4], name='s2')

In [12]:
s1

1    10
2    20
2    30
dtype: int64

In [13]:
s2

2    35
2    44
4    53
Name: s2, dtype: int64

In [14]:
s1 + s2

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

### Broadcasting

In [15]:
s2 + 5

2    40
2    49
4    58
Name: s2, dtype: int64

### Operators Methods

In [16]:
s1 + s2

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [17]:
s1.add(s2)

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [18]:
s1.add(s2, fill_value=0)

1    10.0
2    55.0
2    64.0
2    65.0
2    74.0
4    53.0
dtype: float64

### Chaining

In [19]:
((city_mpg +
  highway_mpg)
 / 2
)

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

In [20]:
(city_mpg
 .add(highway_mpg)
 .div(2)
)

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

# Aggregate Methods

### Aggregations

In [21]:
city_mpg.mean()

18.369045304297103

In [23]:
city_mpg.is_unique

False

In [24]:
city_mpg.is_monotonic_increasing

False

In [25]:
city_mpg.quantile()

17.0

In [26]:
city_mpg.quantile(.9)

24.0

In [27]:
city_mpg.quantile([.1, .5, .9])

0.1    13.0
0.5    17.0
0.9    24.0
Name: city08, dtype: double[pyarrow]

### Count & Mean of Attribute

In [30]:
(city_mpg
 .gt(20)
 .sum()
)

10272

In [33]:
# (city_mpg
#  .gt(20)
#  .mul(100)
#  .mean()
# )

In [34]:
(city_mpg
    .gt(20)
    .astype("int64[pyarrow]")   # True/False -> 1/0 en Arrow
    .mul(100)
    .mean()
)

24.965973167412017

In [None]:
### .`agg` & Aggreation Strings