# UnitsExtensionArray

In [2]:
import numpy as np
import pandas as pd
import pandas_units_extension as pi
import astropy.units as u

## Dtype

In [3]:
m_type = pi.UnitsDtype(u.m)
m_type

UnitsDtype("m")

In [4]:
m_type.name

'unit[m]'

In [5]:
m_type.unit

Unit("m")

## UnitsExtensionArray simple example

In [6]:
peaks = pd.read_csv("peaks.csv")
peaks

Unnamed: 0,name,height
0,Mount Elbert,14440
1,Mount Massive,14428
2,Mount Harvard,14421
3,Blanca Peak,14351
4,La Plata Peak,14343
5,Uncompahgre Peak,14321
6,Crestone Peak,14300
7,Mount Lincoln,14293
8,Castle Peak,14279
9,Grays Peak,14278


In [7]:
# Specify the column type
peaks["height"] = peaks["height"].astype("unit[ft]")
peaks

Unnamed: 0,name,height
0,Mount Elbert,14440.0 ft
1,Mount Massive,14428.0 ft
2,Mount Harvard,14421.0 ft
3,Blanca Peak,14351.0 ft
4,La Plata Peak,14343.0 ft
5,Uncompahgre Peak,14321.0 ft
6,Crestone Peak,14300.0 ft
7,Mount Lincoln,14293.0 ft
8,Castle Peak,14279.0 ft
9,Grays Peak,14278.0 ft


In [8]:
# I don't understand that
peaks["height"].units.to_si()

0             4401.312 m
1            4397.6544 m
2            4395.5208 m
3            4374.1848 m
4            4371.7464 m
5    4365.040800000001 m
6              4358.64 m
7            4356.5064 m
8            4352.2392 m
9            4351.9344 m
10           4351.3248 m
11           4349.8008 m
12           4346.1432 m
13           4344.0096 m
14           4329.3792 m
15             4328.16 m
16           4316.8824 m
17           4315.3584 m
18   4308.957600000001 m
19            4302.252 m
Name: height, dtype: unit[m]

In [9]:
peaks.units.to_si()

Unnamed: 0,name,height
0,Mount Elbert,4401.312 m
1,Mount Massive,4397.6544 m
2,Mount Harvard,4395.5208 m
3,Blanca Peak,4374.1848 m
4,La Plata Peak,4371.7464 m
5,Uncompahgre Peak,4365.040800000001 m
6,Crestone Peak,4358.64 m
7,Mount Lincoln,4356.5064 m
8,Castle Peak,4352.2392 m
9,Grays Peak,4351.9344 m


In [10]:
dent_blanche = 4357 * pi.Unit("m")
peaks["tall"] = peaks.height > dent_blanche
peaks

Unnamed: 0,name,height,tall
0,Mount Elbert,14440.0 ft,True
1,Mount Massive,14428.0 ft,True
2,Mount Harvard,14421.0 ft,True
3,Blanca Peak,14351.0 ft,True
4,La Plata Peak,14343.0 ft,True
5,Uncompahgre Peak,14321.0 ft,True
6,Crestone Peak,14300.0 ft,True
7,Mount Lincoln,14293.0 ft,False
8,Castle Peak,14279.0 ft,False
9,Grays Peak,14278.0 ft,False


## Another example

In [27]:
temps = pd.DataFrame({
    "city": ["Prague", "Kathmandu", "Catania", "Boston"],
    "temperature": pd.Series([20, 22, 31, 16], dtype="unit[deg_C]")
})
temps["temperature"].units.to("deg_F")

0                68.0 deg_F
1                71.6 deg_F
2   87.80000000000001 deg_F
3                60.8 deg_F
Name: temperature, dtype: unit[deg_F]

In [11]:
temps = pd.DataFrame([
    ["Prague", 23],
    ["Ostrava", 27],
    ["Nairobi", 31],
    ["Cairo", 47],
    ["Johannesburg", 13]
], columns=["city", "temp"])
temps

Unnamed: 0,city,temp
0,Prague,23
1,Ostrava,27
2,Nairobi,31
3,Cairo,47
4,Johannesburg,13


In [12]:
temps["temp"] = temps["temp"].astype("unit[deg_C]")
temps

Unnamed: 0,city,temp
0,Prague,23.0 deg_C
1,Ostrava,27.0 deg_C
2,Nairobi,31.0 deg_C
3,Cairo,47.0 deg_C
4,Johannesburg,13.0 deg_C


In [13]:
temps["temp"].factorize()

(array([0, 1, 2, 3, 4], dtype=int64),
 Index([23.0, 27.0, 31.0, 47.0, 13.0], dtype='object'))

In [14]:
temps.dtypes

city         object
temp    unit[deg_C]
dtype: object

In [15]:
temps["temp_f"] = temps["temp"].units.to("deg_F")
temps["temp_K"] = temps["temp"].units.to("K")
temps

Unnamed: 0,city,temp,temp_f,temp_K
0,Prague,23.0 deg_C,73.4 deg_F,296.15 K
1,Ostrava,27.0 deg_C,80.6 deg_F,300.15 K
2,Nairobi,31.0 deg_C,87.80000000000001 deg_F,304.15 K
3,Cairo,47.0 deg_C,116.60000000000001 deg_F,320.15 K
4,Johannesburg,13.0 deg_C,55.400000000000006 deg_F,286.15 K


## Advanced computations ("running diary")

In [16]:
df = pd.DataFrame({
    "days": ["Monday", "Tuesday", "Thursday", "Saturday"],
    "distance": pd.Series([10, 12, 22, 18], dtype="unit[km]"),
    "time": pd.Series([50, 60, 120, 108], dtype="unit[min]")
})
df = df.set_index("days")
df = df.reindex(["Monday", "Tuesday", "Wednesday", "Thursday",
                 "Friday", "Saturday", "Sunday"])
df

Unnamed: 0_level_0,distance,time
days,Unnamed: 1_level_1,Unnamed: 2_level_1
Monday,10.0 km,50.0 min
Tuesday,12.0 km,60.0 min
Wednesday,nan km,nan min
Thursday,22.0 km,120.0 min
Friday,nan km,nan min
Saturday,18.0 km,108.0 min
Sunday,nan km,nan min


In [17]:
df["speed"] = (df["distance"] / df["time"]).units.to("km/h")
df["speed[imperial]"] = df["speed"].units.to("mi / h")
df["pace[min/km]"] = (df["time"] / df["distance"] * u.Quantity("1 km")).astype("timedelta64[ns]")
df["fast"] = df["speed"] > u.Quantity("11 km/h")
df

  result = super().__array_ufunc__(function, method, *arrays, **kwargs)


Unnamed: 0_level_0,distance,time,speed,speed[imperial],pace[min/km],fast
days,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Monday,10.0 km,50.0 min,12.0 km / h,7.456454306848009 mi / h,00:05:00,True
Tuesday,12.0 km,60.0 min,12.0 km / h,7.456454306848009 mi / h,00:05:00,True
Wednesday,nan km,nan min,nan km / h,nan mi / h,NaT,False
Thursday,22.0 km,120.0 min,11.0 km / h,6.835083114610675 mi / h,00:05:27.272727,False
Friday,nan km,nan min,nan km / h,nan mi / h,NaT,False
Saturday,18.0 km,108.0 min,10.0 km / h,6.21371192237334 mi / h,00:06:00,False
Sunday,nan km,nan min,nan km / h,nan mi / h,NaT,False
