In [8]:
import pandas as pd
import numpy as np

url = 'https://github.com/mattharrison/datasets/raw/master/data/vehicles.csv.zip'
df = pd.read_csv(url, low_memory=False)
city_mpg = df['city08']
highway_mpg = df['highway08']

## Conversion Methods

In [3]:
city_mpg.convert_dtypes()

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: Int64

In [5]:
# To specify a type for a series, you can try to use the .astype method
city_mpg.astype('Int16')

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: Int16

In [9]:
np.iinfo('int64')

iinfo(min=-9223372036854775808, max=9223372036854775807, dtype=int64)

In [11]:
np.iinfo('uint8')

iinfo(min=0, max=255, dtype=uint8)

In [12]:
np.finfo('float16')

finfo(resolution=0.001, min=-6.55040e+04, max=6.55040e+04, dtype=float16)

In [13]:
np.finfo('float64')

finfo(resolution=1e-15, min=-1.7976931348623157e+308, max=1.7976931348623157e+308, dtype=float64)

## Memory Usage
+ To calculate memory usage of the Series, you can use the .nbytes property or the .memory_usage method

In [14]:
city_mpg.nbytes

329152

In [15]:
city_mpg.astype('Int16').nbytes

123432

In [17]:
make = df['make']
make.nbytes

329152

In [18]:
make.memory_usage()

329280

In [19]:
make.memory_usage(deep=True)

2606395

In [20]:
# converting to a categorical
# saves a lot of memory
(make
.astype('category')
.memory_usage(deep=True))

95888

## String and Category Types

In [21]:
# the .astype method can convert numeric series to string
city_mpg.astype(str)

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: object

In [22]:
# to covnert into a categorical type
city_mpg.astype('category')

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: category
Categories (105, int64): [6, 7, 8, 9, ..., 137, 138, 140, 150]

In [24]:
# Ordered Categories
values = pd.Series(sorted(set(city_mpg)))
city_type = pd.CategoricalDtype(categories=values,ordered=True)
city_mpg.astype(city_type)

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: category
Categories (105, int64): [6 < 7 < 8 < 9 ... 137 < 138 < 140 < 150]

In [25]:
## Converting to other types
# converts to list
city_mpg.to_list

<bound method IndexOpsMixin.tolist of 0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int64>

In [26]:
# converts to Dataframe
city_mpg.to_frame()

Unnamed: 0,city08
0,19
1,9
2,23
3,10
4,17
5,21
6,22
7,23
8,23
9,23
