In [None]:
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
print(df.head())


# query 

In [172]:
df.query('A > B')

Unnamed: 0,A,B,C,D
0,-0.46885,-0.575355,-1.755448,-0.721407
1,0.62683,0.142729,0.959875,0.695038
2,-0.32552,-1.051279,0.077001,-0.976853
3,0.620873,0.115614,0.203067,-1.259401
5,0.424593,0.006147,-0.542266,0.060362
6,0.326262,-0.133568,-0.246196,-1.293947
8,0.508724,0.491179,0.511179,-0.430321
9,-0.132347,-0.218552,-0.705951,1.288782


In [173]:
df.query('A > B & B > C')

Unnamed: 0,A,B,C,D
0,-0.46885,-0.575355,-1.755448,-0.721407
5,0.424593,0.006147,-0.542266,0.060362
6,0.326262,-0.133568,-0.246196,-1.293947
9,-0.132347,-0.218552,-0.705951,1.288782


In [174]:
df.query('A > B | B > C')

Unnamed: 0,A,B,C,D
0,-0.46885,-0.575355,-1.755448,-0.721407
1,0.62683,0.142729,0.959875,0.695038
2,-0.32552,-1.051279,0.077001,-0.976853
3,0.620873,0.115614,0.203067,-1.259401
4,-0.628626,0.230832,-1.168348,0.427251
5,0.424593,0.006147,-0.542266,0.060362
6,0.326262,-0.133568,-0.246196,-1.293947
8,0.508724,0.491179,0.511179,-0.430321
9,-0.132347,-0.218552,-0.705951,1.288782


In [175]:
df.query('A > B & B > C | C > D').groupby('A').sum()

Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-1.358186,-0.768316,-0.553,-0.814419
-0.46885,-0.575355,-1.755448,-0.721407
-0.32552,-1.051279,0.077001,-0.976853
-0.132347,-0.218552,-0.705951,1.288782
0.326262,-0.133568,-0.246196,-1.293947
0.424593,0.006147,-0.542266,0.060362
0.508724,0.491179,0.511179,-0.430321
0.620873,0.115614,0.203067,-1.259401
0.62683,0.142729,0.959875,0.695038


In [176]:
l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
df = pd.DataFrame(l, columns=["a", "b", "c"])
print(df.head())
df.groupby('b').sum()



   a    b  c
0  1  2.0  3
1  1  NaN  4
2  2  1.0  3
3  1  2.0  2


Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5


In [177]:
df.groupby(by=["b"], dropna=False).sum()



Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5
,1,4


In [178]:
index = pd.MultiIndex.from_product([["A", "B"], ["a", "b"]])
df = pd.DataFrame(np.random.randn(4), index=index, columns=["col"])
print(df.head())

idx = pd.Index([1, 2, 3])
df = pd.DataFrame(np.random.randn(3, 3), index=idx)
print(df.head())


          col
A a  1.142155
  b -0.562651
B a  0.082494
  b  3.012826
          0         1         2
1 -0.184636  0.947877 -0.079786
2 -1.282304 -0.824027  0.244820
3  0.491465  0.629511 -0.447944


# select

## select columns

In [179]:
df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
selected_columns = df[['A', 'B']]
selected_columns.head()

Unnamed: 0,A,B
0,0.769845,-0.168352
1,-1.96118,0.716396
2,-0.816087,-1.468212
3,-1.144079,0.490981
4,-0.614567,0.007935


## select rows

In [180]:
selected_rows = df.iloc[2:5]
selected_rows.head()

Unnamed: 0,A,B,C,D
2,-0.816087,-1.468212,1.133745,-0.656113
3,-1.144079,0.490981,-0.007495,-0.221374
4,-0.614567,0.007935,-0.438814,1.171975


## Conditional Selection

In [181]:
selected_rows = df[df['A'] > 0.5]
selected_rows.head()

Unnamed: 0,A,B,C,D
0,0.769845,-0.168352,0.63547,-0.020493
6,1.286383,-1.099055,-0.122614,1.241066


# Where

In [182]:
df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
print(df.head())
df = df.where(df > 0)
df.head()

          A         B         C         D
0 -1.111456 -0.738958  0.282170 -0.754432
1  0.363839 -0.298873  0.080403  1.113625
2  0.922855 -0.349329  1.476782  1.421746
3 -0.475355 -0.729138  0.266818  0.430500
4 -1.188233 -1.170551  2.790584 -0.483027


Unnamed: 0,A,B,C,D
0,,,0.28217,
1,0.363839,,0.080403,1.113625
2,0.922855,,1.476782,1.421746
3,,,0.266818,0.4305
4,,,2.790584,


# Group By

In [183]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                         'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three',
                         'two', 'two', 'one', 'three'],
                   'C': np.random.randn(8),
                   'D': np.random.randn(8)})
print(df.head())
df.groupby('A').sum()

     A      B         C         D
0  foo    one -1.761914  0.751698
1  bar    one -0.479532  0.573398
2  foo    two -1.360444  1.297496
3  bar  three -0.866444  1.176017
4  foo    two -0.936136 -1.097631


Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,onethreetwo,-1.269738,1.358862
foo,onetwotwoonethree,-5.280005,1.541418


In [None]:
import numpy as np

# 原始的 Python 列表
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
arr = np.array(list_of_lists)
print(arr)