# Series Deep Dive

### Loading Libraries

In [1]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# PyArrow
import pyarrow as pa

### Loading Data

In [2]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/' \
      'vehicles.csv.zip'

In [3]:
df = pd.read_csv(url,
                 dtype_backend='pyarrow',
                 engine='pyarrow')

In [4]:
city_mpg = df.city08

In [5]:
highway_mpg = df.highway08

In [6]:
city_mpg

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int64[pyarrow]

In [7]:
highway_mpg

0        25
1        14
2        33
3        12
4        23
         ..
41139    26
41140    28
41141    24
41142    24
41143    21
Name: highway08, Length: 41144, dtype: int64[pyarrow]

### Series Attributes

In [8]:
# Checking Length-Up
len(dir(city_mpg))

391

# Operators & (`Dunder Methods`)

### Dunder Methods

In [9]:
2 + 4

6

In [10]:
(city_mpg + highway_mpg) / 2

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

### Index Alignment

In [11]:
s1 = pd.Series([10, 20, 30], index=[1, 2, 2])

s2 = pd.Series([35, 44, 53], index=[2, 2, 4], name='s2')

In [12]:
s1

1    10
2    20
2    30
dtype: int64

In [13]:
s2

2    35
2    44
4    53
Name: s2, dtype: int64

In [14]:
s1 + s2

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

### Broadcasting

In [15]:
s2 + 5

2    40
2    49
4    58
Name: s2, dtype: int64

### Operators Methods

In [16]:
s1 + s2

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [17]:
s1.add(s2)

1     NaN
2    55.0
2    64.0
2    65.0
2    74.0
4     NaN
dtype: float64

In [18]:
s1.add(s2, fill_value=0)

1    10.0
2    55.0
2    64.0
2    65.0
2    74.0
4    53.0
dtype: float64

### Chaining

In [19]:
((city_mpg +
  highway_mpg)
 / 2
)

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

In [20]:
(city_mpg
 .add(highway_mpg)
 .div(2)
)

0        22.0
1        11.5
2        28.0
3        11.0
4        20.0
         ... 
41139    22.5
41140    24.0
41141    21.0
41142    21.0
41143    18.5
Length: 41144, dtype: double[pyarrow]

# Aggregate Methods

### Aggregations

In [21]:
city_mpg.mean()

18.369045304297103

In [22]:
city_mpg.is_unique

False

In [23]:
city_mpg.is_monotonic_increasing

False

In [24]:
city_mpg.quantile()

17.0

In [25]:
city_mpg.quantile(.9)

24.0

In [26]:
city_mpg.quantile([.1, .5, .9])

0.1    13.0
0.5    17.0
0.9    24.0
Name: city08, dtype: double[pyarrow]

### Count & Mean of Attribute

In [27]:
(city_mpg
 .gt(20)
 .sum()
)

10272

In [28]:
# (city_mpg
#  .gt(20)
#  .mul(100)
#  .mean()
# )

In [29]:
(city_mpg
    .gt(20)
    .astype("int64[pyarrow]")   # True/False -> 1/0 en Arrow
    .mul(100)
    .mean()
)

24.965973167412017

### `.agg` & Aggreation Strings

In [30]:
city_mpg.agg('mean')

18.369045304297103

In [31]:
def second_to_last(s):
    return s.iloc[-2]

In [32]:
city_mpg.agg(['mean', np.var, max, second_to_last])

mean               18.369045
var                62.501517
max               150.000000
second_to_last     18.000000
Name: city08, dtype: float64

# Conversion Methods

### Type Conversion

In [33]:
city_mpg.astype('int16[pyarrow]')

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int16[pyarrow]

In [34]:
city_mpg.astype('int8[pyarrow]')

ArrowInvalid: Integer value 132 not in range: -128 to 127

In [35]:
np.iinfo('int64')

iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)

In [36]:
np.iinfo('int8')

iinfo(min=-128, max=127, dtype=int8)

In [40]:
# np.finfo('int32')

### Memory Usage

In [42]:
city_mpg.nbytes

329152

In [43]:
city_mpg.astype('Int16').nbytes

123432

In [44]:
make = df.make

In [45]:
make.nbytes

425635

In [46]:
make.memory_usage()

425767

In [47]:
make.memory_usage(deep=True)

425767

In [49]:
make.astype(str).memory_usage()

590343

In [50]:
make.astype(str).memory_usage(deep=True)

590343

### String & Category Types

In [51]:
(make
 .astype('category')
 .memory_usage(deep=True)
)

84533