# Pandas - Python Library

In [1]:
# Installing libraries
# pip3 install pandas
# pip3 install numpy

- ### Importing libraries

In [2]:
import pandas as pd
import numpy as np

In [3]:
# Objects creation
s = pd.Series([1, 3, np.nan ,5, 7, 8, 9]) # Series -> Column
s

0    1.0
1    3.0
2    NaN
3    5.0
4    7.0
5    8.0
6    9.0
dtype: float64

In [4]:
dates = pd.date_range('20230101', periods=6)
dates

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2023-01-01,-0.687125,-0.289322,0.136143,1.293307
2023-01-02,0.681133,-0.517116,0.851075,0.599484
2023-01-03,-0.63304,-0.762864,0.592309,-1.152378
2023-01-04,-0.830034,0.326773,-0.270945,-1.001267
2023-01-05,0.145065,0.967164,0.782808,0.910107
2023-01-06,0.919067,-1.449038,0.775989,-1.062123


In [6]:
# Creating Dataframe using Dictionary
df2 = pd.DataFrame(
    {
        'A': 1.0,
        'B': pd.Timestamp('20230723'),
        'C': pd.Series(1, index=list(range(4)), dtype=float),
        'D': np.array([3] * 4, dtype="int32"),
        'E': pd.Categorical(['girl', 'woman', 'girl', 'woman']),
        'F': 'female', 
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2023-07-23,1.0,3,girl,female
1,1.0,2023-07-23,1.0,3,woman,female
2,1.0,2023-07-23,1.0,3,girl,female
3,1.0,2023-07-23,1.0,3,woman,female


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float64
D             int32
E          category
F            object
dtype: object

In [9]:
df.head(3)

Unnamed: 0,A,B,C,D
2023-01-01,-0.687125,-0.289322,0.136143,1.293307
2023-01-02,0.681133,-0.517116,0.851075,0.599484
2023-01-03,-0.63304,-0.762864,0.592309,-1.152378


In [10]:
df.tail(2)

Unnamed: 0,A,B,C,D
2023-01-05,0.145065,0.967164,0.782808,0.910107
2023-01-06,0.919067,-1.449038,0.775989,-1.062123


In [11]:
df.index # row's name / head

DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df2.index

Index([0, 1, 2, 3], dtype='int64')

In [14]:
df.to_numpy() # To create or convert to numpy's array   

array([[-0.68712505, -0.28932163,  0.1361426 ,  1.29330688],
       [ 0.68113275, -0.51711636,  0.8510748 ,  0.5994837 ],
       [-0.63303966, -0.76286444,  0.59230927, -1.15237761],
       [-0.83003412,  0.32677334, -0.27094468, -1.00126693],
       [ 0.14506501,  0.96716372,  0.78280781,  0.91010654],
       [ 0.91906662, -1.44903819,  0.77598908, -1.06212317]])

In [15]:
df2.to_numpy()

array([[1.0, Timestamp('2023-07-23 00:00:00'), 1.0, 3, 'girl', 'female'],
       [1.0, Timestamp('2023-07-23 00:00:00'), 1.0, 3, 'woman', 'female'],
       [1.0, Timestamp('2023-07-23 00:00:00'), 1.0, 3, 'girl', 'female'],
       [1.0, Timestamp('2023-07-23 00:00:00'), 1.0, 3, 'woman', 'female']],
      dtype=object)

In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.067489,-0.287401,0.477896,-0.068812
std,0.756859,0.846095,0.449831,1.121653
min,-0.830034,-1.449038,-0.270945,-1.152378
25%,-0.673604,-0.701427,0.250184,-1.046909
50%,-0.243987,-0.403219,0.684149,-0.200892
75%,0.547116,0.17275,0.781103,0.832451
max,0.919067,0.967164,0.851075,1.293307


In [17]:
df.T # To transpose the data

Unnamed: 0,2023-01-01,2023-01-02,2023-01-03,2023-01-04,2023-01-05,2023-01-06
A,-0.687125,0.681133,-0.63304,-0.830034,0.145065,0.919067
B,-0.289322,-0.517116,-0.762864,0.326773,0.967164,-1.449038
C,0.136143,0.851075,0.592309,-0.270945,0.782808,0.775989
D,1.293307,0.599484,-1.152378,-1.001267,0.910107,-1.062123


In [18]:
# Sorting
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2023-01-01,1.293307,0.136143,-0.289322,-0.687125
2023-01-02,0.599484,0.851075,-0.517116,0.681133
2023-01-03,-1.152378,0.592309,-0.762864,-0.63304
2023-01-04,-1.001267,-0.270945,0.326773,-0.830034
2023-01-05,0.910107,0.782808,0.967164,0.145065
2023-01-06,-1.062123,0.775989,-1.449038,0.919067


In [21]:
df.sort_index(axis=1, ascending=True)

Unnamed: 0,A,B,C,D
2023-01-01,-0.687125,-0.289322,0.136143,1.293307
2023-01-02,0.681133,-0.517116,0.851075,0.599484
2023-01-03,-0.63304,-0.762864,0.592309,-1.152378
2023-01-04,-0.830034,0.326773,-0.270945,-1.001267
2023-01-05,0.145065,0.967164,0.782808,0.910107
2023-01-06,0.919067,-1.449038,0.775989,-1.062123


In [24]:
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2023-01-06,0.919067,-1.449038,0.775989,-1.062123
2023-01-03,-0.63304,-0.762864,0.592309,-1.152378
2023-01-02,0.681133,-0.517116,0.851075,0.599484
2023-01-01,-0.687125,-0.289322,0.136143,1.293307
2023-01-04,-0.830034,0.326773,-0.270945,-1.001267
2023-01-05,0.145065,0.967164,0.782808,0.910107


In [25]:
df.sort_values(by="B", ascending=True)

Unnamed: 0,A,B,C,D
2023-01-06,0.919067,-1.449038,0.775989,-1.062123
2023-01-03,-0.63304,-0.762864,0.592309,-1.152378
2023-01-02,0.681133,-0.517116,0.851075,0.599484
2023-01-01,-0.687125,-0.289322,0.136143,1.293307
2023-01-04,-0.830034,0.326773,-0.270945,-1.001267
2023-01-05,0.145065,0.967164,0.782808,0.910107


In [26]:
# Filtering data based on values
df["B"] 

2023-01-01   -0.289322
2023-01-02   -0.517116
2023-01-03   -0.762864
2023-01-04    0.326773
2023-01-05    0.967164
2023-01-06   -1.449038
Freq: D, Name: B, dtype: float64