In [2]:
import numpy as np
import pandas as pd
import itertools

## Basics

In [2]:
s = pd.Series([1,2,3,np.nan,4,5])

In [3]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [18]:
dates = pd.date_range(start="11/01/2024", periods=6)

In [19]:
dates

DatetimeIndex(['2024-11-01', '2024-11-02', '2024-11-03', '2024-11-04',
               '2024-11-05', '2024-11-06'],
              dtype='datetime64[ns]', freq='D')

In [20]:
df = pd.DataFrame(np.arange(24).reshape(6,4), index=dates, columns=['A', 'B', 'C', 'D']) 

In [21]:
df

Unnamed: 0,A,B,C,D
2024-11-01,0,1,2,3
2024-11-02,4,5,6,7
2024-11-03,8,9,10,11
2024-11-04,12,13,14,15
2024-11-05,16,17,18,19
2024-11-06,20,21,22,23


In [22]:
df.index

DatetimeIndex(['2024-11-01', '2024-11-02', '2024-11-03', '2024-11-04',
               '2024-11-05', '2024-11-06'],
              dtype='datetime64[ns]', freq='D')

In [23]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [24]:
A = df.to_numpy()

In [25]:
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [26]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,10.0,11.0,12.0,13.0
std,7.483315,7.483315,7.483315,7.483315
min,0.0,1.0,2.0,3.0
25%,5.0,6.0,7.0,8.0
50%,10.0,11.0,12.0,13.0
75%,15.0,16.0,17.0,18.0
max,20.0,21.0,22.0,23.0


In [27]:
df.T

Unnamed: 0,2024-11-01,2024-11-02,2024-11-03,2024-11-04,2024-11-05,2024-11-06
A,0,4,8,12,16,20
B,1,5,9,13,17,21
C,2,6,10,14,18,22
D,3,7,11,15,19,23


In [32]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2024-11-06,20,21,22,23
2024-11-05,16,17,18,19
2024-11-04,12,13,14,15
2024-11-03,8,9,10,11
2024-11-02,4,5,6,7
2024-11-01,0,1,2,3


In [34]:
df.sort_values(by="A", ascending=False)

Unnamed: 0,A,B,C,D
2024-11-06,20,21,22,23
2024-11-05,16,17,18,19
2024-11-04,12,13,14,15
2024-11-03,8,9,10,11
2024-11-02,4,5,6,7
2024-11-01,0,1,2,3


In [35]:
df["A"]

2024-11-01     0
2024-11-02     4
2024-11-03     8
2024-11-04    12
2024-11-05    16
2024-11-06    20
Freq: D, Name: A, dtype: int64

In [39]:
df["20241102":"20241104"]

Unnamed: 0,A,B,C,D
2024-11-02,4,5,6,7
2024-11-03,8,9,10,11
2024-11-04,12,13,14,15


## Selection

In [43]:
df.loc[dates[0]].to_numpy()

array([0, 1, 2, 3])

In [49]:
df.loc[dates[0]:dates[3],['A','D']]

Unnamed: 0,A,D
2024-11-01,0,3
2024-11-02,4,7
2024-11-03,8,11
2024-11-04,12,15


In [50]:
df.iloc[3]

A    12
B    13
C    14
D    15
Name: 2024-11-04 00:00:00, dtype: int64

In [54]:
df.iloc[2:3,1:3]

Unnamed: 0,B,C
2024-11-03,9,10


In [55]:
df.head()

Unnamed: 0,A,B,C,D
2024-11-01,0,1,2,3
2024-11-02,4,5,6,7
2024-11-03,8,9,10,11
2024-11-04,12,13,14,15
2024-11-05,16,17,18,19


In [57]:
df[df['A'] < 10] 

Unnamed: 0,A,B,C,D
2024-11-01,0,1,2,3
2024-11-02,4,5,6,7
2024-11-03,8,9,10,11


In [58]:
df[df < 10]

Unnamed: 0,A,B,C,D
2024-11-01,0.0,1.0,2.0,3.0
2024-11-02,4.0,5.0,6.0,7.0
2024-11-03,8.0,9.0,,
2024-11-04,,,,
2024-11-05,,,,
2024-11-06,,,,


In [69]:
df2 = df.copy()
df2["E"] = pd.Series(np.exp(np.arange(6)), index=dates)


In [83]:
A=list(itertools.chain.from_iterable([["one"] * 3, ["two"] * 3]))
df2["F"] = pd.Categorical(A)

In [84]:
df2

Unnamed: 0,A,B,C,D,E,F
2024-11-01,0,1,2,3,1.0,one
2024-11-02,4,5,6,7,2.718282,one
2024-11-03,8,9,10,11,7.389056,one
2024-11-04,12,13,14,15,20.085537,two
2024-11-05,16,17,18,19,54.59815,two
2024-11-06,20,21,22,23,148.413159,two


In [87]:
df2[df2["F"].isin(["one"])]

Unnamed: 0,A,B,C,D,E,F
2024-11-01,0,1,2,3,1.0,one
2024-11-02,4,5,6,7,2.718282,one
2024-11-03,8,9,10,11,7.389056,one


In [88]:
s1 = pd.Series(np.arange(6)+1, index=pd.date_range("20130102", periods=6))


In [89]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [90]:
df["G"]=s1

In [112]:
df.loc[:,df.columns[0:2]] = np.exp(np.random.rand(6,2)).astype(int)

In [113]:
df

Unnamed: 0,A,B,C,D,G,0
2024-11-01,1.0,1.0,2,3,,0.0
2024-11-02,1.0,1.0,6,7,,
2024-11-03,2.0,1.0,10,11,,
2024-11-04,1.0,1.0,14,15,,
2024-11-05,2.0,1.0,18,19,,
2024-11-06,1.0,2.0,22,23,,


## Operations

In [16]:
dates = pd.date_range(start="11/01/2024", periods=6)
df = pd.DataFrame(np.arange(24).reshape(6,4), index=dates, columns=['A', 'B', 'C', 'D']) 

In [17]:
df

Unnamed: 0,A,B,C,D
2024-11-01,0,1,2,3
2024-11-02,4,5,6,7
2024-11-03,8,9,10,11
2024-11-04,12,13,14,15
2024-11-05,16,17,18,19
2024-11-06,20,21,22,23


In [18]:
df.mean(axis=1)

2024-11-01     1.5
2024-11-02     5.5
2024-11-03     9.5
2024-11-04    13.5
2024-11-05    17.5
2024-11-06    21.5
Freq: D, dtype: float64

In [19]:
s = pd.Series([1,2,3,4,np.nan,6], index=dates).shift(1)

In [20]:
s

2024-11-01    NaN
2024-11-02    1.0
2024-11-03    2.0
2024-11-04    3.0
2024-11-05    4.0
2024-11-06    NaN
Freq: D, dtype: float64

In [21]:
df.sub(s, axis = 'index')

Unnamed: 0,A,B,C,D
2024-11-01,,,,
2024-11-02,3.0,4.0,5.0,6.0
2024-11-03,6.0,7.0,8.0,9.0
2024-11-04,9.0,10.0,11.0,12.0
2024-11-05,12.0,13.0,14.0,15.0
2024-11-06,,,,


In [23]:
df.agg(lambda x: np.mean(x) * -1)

A   -10.0
B   -11.0
C   -12.0
D   -13.0
dtype: float64

In [24]:
df.transform(lambda x: x**2)

Unnamed: 0,A,B,C,D
2024-11-01,0,1,4,9
2024-11-02,16,25,36,49
2024-11-03,64,81,100,121
2024-11-04,144,169,196,225
2024-11-05,256,289,324,361
2024-11-06,400,441,484,529


In [9]:
idx = pd.Index(["A", 4, 3])
pd.DataFrame({"sensor_readings":[11,12,13]}, index=idx)

Unnamed: 0,sensor_readings
A,11
4,12
3,13
