In [2]:
# Import same packets
import numpy as np
import pandas as pd

In [6]:
# Object creation
s = pd.Series([1,3,5,np.nan, 6,7])

In [7]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    7.0
dtype: float64

In [8]:
# Creating a DataFrame by passing a NumPy array, with a datatime index and labeled columns
dates = pd.date_range("20130101", periods=6)

In [9]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD"))

In [11]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007
2013-01-05,1.139238,0.336692,-0.708126,-0.853847
2013-01-06,-1.345384,-1.275939,-0.536902,-0.125534


In [12]:
# Creating a DataFrame by passing a dict of objects that can be converted to series-like.
In [9]: df2 = pd.DataFrame(
   ...:     {
   ...:         "A": 1.0,
   ...:         "B": pd.Timestamp("20130102"),
   ...:         "C": pd.Series(1, index=list(range(4)), dtype="float32"),
   ...:         "D": np.array([3] * 4, dtype="int32"),
   ...:         "E": pd.Categorical(["test", "train", "test", "train"]),
   ...:         "F": "foo",
   ...:     }
   ...: )
   ...: 

In [13]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [14]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [16]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [17]:
df.to_numpy()

array([[ 0.92428978,  0.44041706,  0.44668885,  0.44656603],
       [-0.7348144 , -0.79123497, -0.56874982,  0.00609173],
       [ 0.42420731,  1.09267117,  0.09397013,  1.27633697],
       [ 0.86499833, -0.29151261, -1.29488658, -0.84700738],
       [ 1.139238  ,  0.3366924 , -0.70812582, -0.85384667],
       [-1.34538432, -1.27593892, -0.53690197, -0.12553385]])

In [18]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.212089,-0.081484,-0.428001,-0.016232
std,1.01592,0.871834,0.616306,0.811067
min,-1.345384,-1.275939,-1.294887,-0.853847
25%,-0.445059,-0.666304,-0.673282,-0.666639
50%,0.644603,0.02259,-0.552826,-0.059721
75%,0.909467,0.414486,-0.063748,0.336447
max,1.139238,1.092671,0.446689,1.276337


In [19]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.92429,-0.734814,0.424207,0.864998,1.139238,-1.345384
B,0.440417,-0.791235,1.092671,-0.291513,0.336692,-1.275939
C,0.446689,-0.56875,0.09397,-1.294887,-0.708126,-0.536902
D,0.446566,0.006092,1.276337,-0.847007,-0.853847,-0.125534


In [20]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.446566,0.446689,0.440417,0.92429
2013-01-02,0.006092,-0.56875,-0.791235,-0.734814
2013-01-03,1.276337,0.09397,1.092671,0.424207
2013-01-04,-0.847007,-1.294887,-0.291513,0.864998
2013-01-05,-0.853847,-0.708126,0.336692,1.139238
2013-01-06,-0.125534,-0.536902,-1.275939,-1.345384


In [21]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007
2013-01-05,1.139238,0.336692,-0.708126,-0.853847
2013-01-06,-1.345384,-1.275939,-0.536902,-0.125534


In [22]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-06,-1.345384,-1.275939,-0.536902,-0.125534
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007
2013-01-05,1.139238,0.336692,-0.708126,-0.853847
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-03,0.424207,1.092671,0.09397,1.276337


In [23]:
df.sort_values(by="D")

Unnamed: 0,A,B,C,D
2013-01-05,1.139238,0.336692,-0.708126,-0.853847
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007
2013-01-06,-1.345384,-1.275939,-0.536902,-0.125534
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-03,0.424207,1.092671,0.09397,1.276337


In [24]:
df["A"]

2013-01-01    0.924290
2013-01-02   -0.734814
2013-01-03    0.424207
2013-01-04    0.864998
2013-01-05    1.139238
2013-01-06   -1.345384
Freq: D, Name: A, dtype: float64

In [25]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337


In [26]:
df[1:3]

Unnamed: 0,A,B,C,D
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337


In [27]:
df[0:4]

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007


In [28]:
df[2:4]

Unnamed: 0,A,B,C,D
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007


In [29]:
df.loc[dates[2]]

A    0.424207
B    1.092671
C    0.093970
D    1.276337
Name: 2013-01-03 00:00:00, dtype: float64

In [30]:
df.loc["20130102", ["A", "B"]]

A   -0.734814
B   -0.791235
Name: 2013-01-02 00:00:00, dtype: float64

In [31]:
df.iloc[3]

A    0.864998
B   -0.291513
C   -1.294887
D   -0.847007
Name: 2013-01-04 00:00:00, dtype: float64

In [32]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.864998,-0.291513
2013-01-05,1.139238,0.336692


In [33]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,-0.734814,-0.56875
2013-01-03,0.424207,0.09397
2013-01-05,1.139238,-0.708126


In [34]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337


In [35]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,0.440417,0.446689
2013-01-02,-0.791235,-0.56875
2013-01-03,1.092671,0.09397
2013-01-04,-0.291513,-1.294887
2013-01-05,0.336692,-0.708126
2013-01-06,-1.275939,-0.536902


In [36]:
df.iloc[3, 2]

-1.2948865760084214

In [37]:
df.iat[1, 1]

-0.7912349721184718

In [38]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007
2013-01-05,1.139238,0.336692,-0.708126,-0.853847


In [39]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.92429,0.440417,0.446689,0.446566
2013-01-02,,,,0.006092
2013-01-03,0.424207,1.092671,0.09397,1.276337
2013-01-04,0.864998,,,
2013-01-05,1.139238,0.336692,,
2013-01-06,,,,


In [40]:
df2 = df.copy()

In [41]:
df2["E"]=["one", "one", "two", "three", "four", "three"]

In [42]:
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.92429,0.440417,0.446689,0.446566,one
2013-01-02,-0.734814,-0.791235,-0.56875,0.006092,one
2013-01-03,0.424207,1.092671,0.09397,1.276337,two
2013-01-04,0.864998,-0.291513,-1.294887,-0.847007,three
2013-01-05,1.139238,0.336692,-0.708126,-0.853847,four
2013-01-06,-1.345384,-1.275939,-0.536902,-0.125534,three


In [43]:
df2[df2["E"].isin(["two","four"])]

Unnamed: 0,A,B,C,D,E
2013-01-03,0.424207,1.092671,0.09397,1.276337,two
2013-01-05,1.139238,0.336692,-0.708126,-0.853847,four


In [63]:
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range("20130102", periods=6))

In [64]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [65]:
df["F"] = s1

In [66]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,2.0,0.0,0.446689,5,
2013-01-02,-0.734814,-0.791235,-0.56875,5,1.0
2013-01-03,0.424207,1.092671,0.09397,5,2.0
2013-01-04,0.864998,-0.291513,-1.294887,5,3.0
2013-01-05,1.139238,0.336692,-0.708126,5,4.0
2013-01-06,-1.345384,-1.275939,-0.536902,5,5.0


In [67]:
df.at[dates[0],"A"]=2

In [68]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,2.0,0.0,0.446689,5,
2013-01-02,-0.734814,-0.791235,-0.56875,5,1.0
2013-01-03,0.424207,1.092671,0.09397,5,2.0
2013-01-04,0.864998,-0.291513,-1.294887,5,3.0
2013-01-05,1.139238,0.336692,-0.708126,5,4.0
2013-01-06,-1.345384,-1.275939,-0.536902,5,5.0


In [69]:
df.iat[0,3] = 0

In [70]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,2.0,0.0,0.446689,0,
2013-01-02,-0.734814,-0.791235,-0.56875,5,1.0
2013-01-03,0.424207,1.092671,0.09397,5,2.0
2013-01-04,0.864998,-0.291513,-1.294887,5,3.0
2013-01-05,1.139238,0.336692,-0.708126,5,4.0
2013-01-06,-1.345384,-1.275939,-0.536902,5,5.0


In [71]:
len(df)

6

In [72]:
df.loc[:,"D"] = np.array([5]*len(df))

In [73]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,2.0,0.0,0.446689,5,
2013-01-02,-0.734814,-0.791235,-0.56875,5,1.0
2013-01-03,0.424207,1.092671,0.09397,5,2.0
2013-01-04,0.864998,-0.291513,-1.294887,5,3.0
2013-01-05,1.139238,0.336692,-0.708126,5,4.0
2013-01-06,-1.345384,-1.275939,-0.536902,5,5.0


In [75]:
df2 = df.copy()

In [76]:
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,2.0,0.0,0.446689,5,
2013-01-02,-0.734814,-0.791235,-0.56875,5,1.0
2013-01-03,0.424207,1.092671,0.09397,5,2.0
2013-01-04,0.864998,-0.291513,-1.294887,5,3.0
2013-01-05,1.139238,0.336692,-0.708126,5,4.0
2013-01-06,-1.345384,-1.275939,-0.536902,5,5.0


In [None]:
# Missing data