Creating a **Series** by passing a list of values

In [4]:
import pandas as pd
import numpy as np

s = pd.Series([1,3,5,np.nan,6,8])

In [5]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

Creating a **DataFrame** using Numpy array, with a datetime index and labeld column

In [6]:
dates = pd.date_range("20210106", periods=6)

In [7]:
dates

DatetimeIndex(['2021-01-06', '2021-01-07', '2021-01-08', '2021-01-09',
               '2021-01-10', '2021-01-11'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df = pd.DataFrame(np.random.randn(6,4), index = dates, columns = list("ABCD"))

In [10]:
df

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453
2021-01-07,-0.406669,1.527887,-0.530647,-1.747194
2021-01-08,-0.448772,2.024578,-1.654048,0.723015
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032
2021-01-10,-0.649714,0.099438,0.250337,-0.258712
2021-01-11,-0.354186,0.564028,-0.491385,-0.266737


**DataFrame** from dict()

In [13]:
df2 = pd.DataFrame(
    {
        "A": 1.0, 
        "B": pd.Timestamp("20210305"), 
        "C": pd.Series(1, index = list(range(4)), dtype = "float32"),
        "D": np.array([3] * 4, dtype = "int32"),
        "E": pd.Categorical(["test","train"] * 2),
        "F": "foo"
    })

In [17]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2021-03-05,1.0,3,test,foo
1,1.0,2021-03-05,1.0,3,train,foo
2,1.0,2021-03-05,1.0,3,test,foo
3,1.0,2021-03-05,1.0,3,train,foo


In [18]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Viewing Data

In [19]:
df.head()

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453
2021-01-07,-0.406669,1.527887,-0.530647,-1.747194
2021-01-08,-0.448772,2.024578,-1.654048,0.723015
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032
2021-01-10,-0.649714,0.099438,0.250337,-0.258712


In [20]:
df.tail(3)

Unnamed: 0,A,B,C,D
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032
2021-01-10,-0.649714,0.099438,0.250337,-0.258712
2021-01-11,-0.354186,0.564028,-0.491385,-0.266737


In [21]:
df.index

DatetimeIndex(['2021-01-06', '2021-01-07', '2021-01-08', '2021-01-09',
               '2021-01-10', '2021-01-11'],
              dtype='datetime64[ns]', freq='D')

In [22]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [26]:
df.to_numpy()
# Gives a numpy representation of the DataFrame

array([[ 1.06790673, -0.30837038, -2.55637425, -0.47945282],
       [-0.40666925,  1.52788708, -0.53064658, -1.74719353],
       [-0.44877157,  2.02457801, -1.65404805,  0.72301475],
       [-0.34863644, -1.0138577 , -0.02297185, -0.69103236],
       [-0.64971447,  0.09943791,  0.25033708, -0.25871192],
       [-0.35418648,  0.56402774, -0.49138533, -0.26673704]])

In [25]:
df.describe()
# Shows a quick statistic summary of your data

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.190012,0.482284,-0.834181,-0.453352
std,0.626051,1.139191,1.066022,0.798467
min,-0.649714,-1.013858,-2.556374,-1.747194
25%,-0.438246,-0.206418,-1.373198,-0.638137
50%,-0.380428,0.331733,-0.511016,-0.373095
75%,-0.350024,1.286922,-0.140075,-0.260718
max,1.067907,2.024578,0.250337,0.723015


In [28]:
df.T
# Transposing your data using T

Unnamed: 0,2021-01-06,2021-01-07,2021-01-08,2021-01-09,2021-01-10,2021-01-11
A,1.067907,-0.406669,-0.448772,-0.348636,-0.649714,-0.354186
B,-0.30837,1.527887,2.024578,-1.013858,0.099438,0.564028
C,-2.556374,-0.530647,-1.654048,-0.022972,0.250337,-0.491385
D,-0.479453,-1.747194,0.723015,-0.691032,-0.258712,-0.266737


In [31]:
df.sort_index(axis=1,ascending=False)
# sorting based on axis 1

Unnamed: 0,D,C,B,A
2021-01-06,-0.479453,-2.556374,-0.30837,1.067907
2021-01-07,-1.747194,-0.530647,1.527887,-0.406669
2021-01-08,0.723015,-1.654048,2.024578,-0.448772
2021-01-09,-0.691032,-0.022972,-1.013858,-0.348636
2021-01-10,-0.258712,0.250337,0.099438,-0.649714
2021-01-11,-0.266737,-0.491385,0.564028,-0.354186


In [32]:
df.sort_values(by="C")

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453
2021-01-08,-0.448772,2.024578,-1.654048,0.723015
2021-01-07,-0.406669,1.527887,-0.530647,-1.747194
2021-01-11,-0.354186,0.564028,-0.491385,-0.266737
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032
2021-01-10,-0.649714,0.099438,0.250337,-0.258712


#### Getting Data

In [33]:
df["A"]
# Getting a single column values

2021-01-06    1.067907
2021-01-07   -0.406669
2021-01-08   -0.448772
2021-01-09   -0.348636
2021-01-10   -0.649714
2021-01-11   -0.354186
Freq: D, Name: A, dtype: float64

In [34]:
df[0:3]

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453
2021-01-07,-0.406669,1.527887,-0.530647,-1.747194
2021-01-08,-0.448772,2.024578,-1.654048,0.723015


In [35]:
df["20210108":]

Unnamed: 0,A,B,C,D
2021-01-08,-0.448772,2.024578,-1.654048,0.723015
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032
2021-01-10,-0.649714,0.099438,0.250337,-0.258712
2021-01-11,-0.354186,0.564028,-0.491385,-0.266737


#### Selection by label

In [37]:
df.loc[dates[2]]

A   -0.448772
B    2.024578
C   -1.654048
D    0.723015
Name: 2021-01-08 00:00:00, dtype: float64

In [39]:
df.loc[dates[1],"B"]
# To fetch a scalar value of DataFrame

1.5278870814867447

#### Selection by Position

* row and column

In [40]:
df.iloc[2]

A   -0.448772
B    2.024578
C   -1.654048
D    0.723015
Name: 2021-01-08 00:00:00, dtype: float64

In [42]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2021-01-06,-0.30837,-2.556374
2021-01-07,1.527887,-0.530647
2021-01-08,2.024578,-1.654048
2021-01-09,-1.013858,-0.022972
2021-01-10,0.099438,0.250337
2021-01-11,0.564028,-0.491385


#### Boolean Indexing

In [43]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453


In [44]:
df[df > 0]

Unnamed: 0,A,B,C,D
2021-01-06,1.067907,,,
2021-01-07,,1.527887,,
2021-01-08,,2.024578,,0.723015
2021-01-09,,,,
2021-01-10,,0.099438,0.250337,
2021-01-11,,0.564028,,


In [45]:
df2 = df.copy()

In [46]:
df2["E"] = ["one","two","three","four","five","six"]

In [47]:
df2

Unnamed: 0,A,B,C,D,E
2021-01-06,1.067907,-0.30837,-2.556374,-0.479453,one
2021-01-07,-0.406669,1.527887,-0.530647,-1.747194,two
2021-01-08,-0.448772,2.024578,-1.654048,0.723015,three
2021-01-09,-0.348636,-1.013858,-0.022972,-0.691032,four
2021-01-10,-0.649714,0.099438,0.250337,-0.258712,five
2021-01-11,-0.354186,0.564028,-0.491385,-0.266737,six


In [48]:
df2[df2["E"].isin(["three","five"])]

Unnamed: 0,A,B,C,D,E
2021-01-08,-0.448772,2.024578,-1.654048,0.723015,three
2021-01-10,-0.649714,0.099438,0.250337,-0.258712,five


#### Setting

In [51]:
s1 = pd.Series([1,2,3,4,5,6], index = pd.date_range("20210106", periods=6))

In [52]:
s1

2021-01-06    1
2021-01-07    2
2021-01-08    3
2021-01-09    4
2021-01-10    5
2021-01-11    6
Freq: D, dtype: int64

In [54]:
df["F"] = s1

In [55]:
df.at[dates[0],"A"] = 2

In [57]:
df.iat[0,1] = 0

In [59]:
df.loc[:,"D"] = np.array([5] * len(df))

In [60]:
df

Unnamed: 0,A,B,C,D,F
2021-01-06,2.0,0.0,-2.556374,5,1
2021-01-07,-0.406669,1.527887,-0.530647,5,2
2021-01-08,-0.448772,2.024578,-1.654048,5,3
2021-01-09,-0.348636,-1.013858,-0.022972,5,4
2021-01-10,-0.649714,0.099438,0.250337,5,5
2021-01-11,-0.354186,0.564028,-0.491385,5,6


In [61]:
df2 = df.copy()

In [62]:
df2[df2 > 0] = -df2

In [63]:
df2

Unnamed: 0,A,B,C,D,F
2021-01-06,-2.0,0.0,-2.556374,-5,-1
2021-01-07,-0.406669,-1.527887,-0.530647,-5,-2
2021-01-08,-0.448772,-2.024578,-1.654048,-5,-3
2021-01-09,-0.348636,-1.013858,-0.022972,-5,-4
2021-01-10,-0.649714,-0.099438,-0.250337,-5,-5
2021-01-11,-0.354186,-0.564028,-0.491385,-5,-6
