In [12]:
# Importer les librairies 
import numpy as np
import pandas as pd

https://pandas.pydata.org/docs/user_guide/10min.html 

In [13]:
# Create objet pandas : series
a = pd.Series([1,3,np.nan,45])
a.head()
# type(a)

0     1.0
1     3.0
2     NaN
3    45.0
dtype: float64

In [14]:
# Creating a DataFrame by passing a NumPy array, with a datetime index using date_range() and labeled columns:
date_list = pd.date_range("20220101", periods=6)
print(date_list)

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06'],
              dtype='datetime64[ns]', freq='D')


In [15]:
# Dataframe
df_1 = pd.DataFrame(np.random.randn(6,4), index=date_list, columns=list("ABCD"))
df_1.head(3)

Unnamed: 0,A,B,C,D
2022-01-01,-0.71107,0.183323,-0.705714,-0.869502
2022-01-02,0.697737,-0.797052,-0.922949,-0.558329
2022-01-03,-0.582123,-0.30626,0.818146,-0.578101


In [16]:
# Creating a DataFrame by passing a dictionary of objects that can be converted into a series-like structure:
df_2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df_2.head()


Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [18]:
# print(df_1.dtypes)
print(df_2.dtypes)

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object


In [19]:
df_2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [20]:
# DataFrame.to_numpy() gives a NumPy representation of the underlying data
df_2.to_numpy

<bound method DataFrame.to_numpy of      A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo>

In [21]:
df_1.to_numpy

<bound method DataFrame.to_numpy of                    A         B         C         D
2022-01-01 -0.711070  0.183323 -0.705714 -0.869502
2022-01-02  0.697737 -0.797052 -0.922949 -0.558329
2022-01-03 -0.582123 -0.306260  0.818146 -0.578101
2022-01-04 -0.402761 -0.334531  1.481635  0.686044
2022-01-05  0.825768 -2.372057 -0.281603  1.040783
2022-01-06  0.647383  0.443263  0.881230 -0.685768>

In [22]:
df_1.describe() # Satistic 

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.079156,-0.530552,0.211791,-0.160812
std,0.715115,1.000882,0.979919,0.808822
min,-0.71107,-2.372057,-0.922949,-0.869502
25%,-0.537282,-0.681422,-0.599686,-0.658852
50%,0.122311,-0.320395,0.268272,-0.568215
75%,0.685148,0.060927,0.865459,0.374951
max,0.825768,0.443263,1.481635,1.040783


In [23]:
df_1.T # Transpose

Unnamed: 0,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06
A,-0.71107,0.697737,-0.582123,-0.402761,0.825768,0.647383
B,0.183323,-0.797052,-0.30626,-0.334531,-2.372057,0.443263
C,-0.705714,-0.922949,0.818146,1.481635,-0.281603,0.88123
D,-0.869502,-0.558329,-0.578101,0.686044,1.040783,-0.685768


In [24]:
# DataFrame.sort_index() sorts by an axis:
df_1.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2022-01-01,-0.869502,-0.705714,0.183323,-0.71107
2022-01-02,-0.558329,-0.922949,-0.797052,0.697737
2022-01-03,-0.578101,0.818146,-0.30626,-0.582123
2022-01-04,0.686044,1.481635,-0.334531,-0.402761
2022-01-05,1.040783,-0.281603,-2.372057,0.825768
2022-01-06,-0.685768,0.88123,0.443263,0.647383


In [25]:
df_1.sort_values(by="B")

Unnamed: 0,A,B,C,D
2022-01-05,0.825768,-2.372057,-0.281603,1.040783
2022-01-02,0.697737,-0.797052,-0.922949,-0.558329
2022-01-04,-0.402761,-0.334531,1.481635,0.686044
2022-01-03,-0.582123,-0.30626,0.818146,-0.578101
2022-01-01,-0.71107,0.183323,-0.705714,-0.869502
2022-01-06,0.647383,0.443263,0.88123,-0.685768


While standard Python / NumPy expressions for selecting and setting are intuitive and come in handy for interactive work, for production code, we recommend the optimized pandas data access methods, DataFrame.at(), DataFrame.iat(), DataFrame.loc() and DataFrame.iloc().

In [28]:
df_1[0:3] # select row 0 --> 3

Unnamed: 0,A,B,C,D
2022-01-01,-0.71107,0.183323,-0.705714,-0.869502
2022-01-02,0.697737,-0.797052,-0.922949,-0.558329
2022-01-03,-0.582123,-0.30626,0.818146,-0.578101


In [31]:
df_1["20220101":"20220102"] # index

Unnamed: 0,A,B,C,D
2022-01-01,-0.71107,0.183323,-0.705714,-0.869502
2022-01-02,0.697737,-0.797052,-0.922949,-0.558329


In [33]:
# See more in Selection by Label using DataFrame.loc() or DataFrame.at().
df_1.loc[date_list[0]] 

A   -0.711070
B    0.183323
C   -0.705714
D   -0.869502
Name: 2022-01-01 00:00:00, dtype: float64

In [34]:
df_1.loc[:, ["A", "B"]]

Unnamed: 0,A,B
2022-01-01,-0.71107,0.183323
2022-01-02,0.697737,-0.797052
2022-01-03,-0.582123,-0.30626
2022-01-04,-0.402761,-0.334531
2022-01-05,0.825768,-2.372057
2022-01-06,0.647383,0.443263


In [1]:
L = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']

In [4]:
print(L[-1:])

['i']
