# Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.
Prior to Pandas, Python was majorly used for data munging and preparation. It had very little contribution towards data analysis. Pandas solved this problem. Using Pandas, we can accomplish five typical steps in the processing and analysis of data, regardless of the origin of data — load, prepare, manipulate, model, and analyze.

Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.

In [1]:
# importing libraries
import pandas as pd
import numpy as np

In [11]:
# Let's make a table -->rows, columns
df = pd.DataFrame(np.arange(0,20).reshape(5,4),index = ['Row1','Row2','Row3','Row4','Row5'],
                  columns =['Column1','Column2','Column3','Column4',])

In [7]:
df.head()

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [12]:
# loc-->row
# iloc--r,c
#access the elements
df.loc['Row1']

Column1    0
Column2    1
Column3    2
Column4    3
Name: Row1, dtype: int32

In [13]:
# type
type(df.loc['Row1'])

# Data frames
# data series

pandas.core.series.Series

In [14]:
df.iloc[:,:]

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [16]:
#indexing
df.iloc[:,1:3]

Unnamed: 0,Column2,Column3
Row1,1,2
Row2,5,6
Row3,9,10
Row4,13,14
Row5,17,18


In [17]:
# creating array
df.iloc[:,1:3].values

array([[ 1,  2],
       [ 5,  6],
       [ 9, 10],
       [13, 14],
       [17, 18]])

In [21]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [24]:
#to count the values
df['Column1'].value_counts()

0     1
8     1
4     1
16    1
12    1
Name: Column1, dtype: int64

In [41]:
df= pd.read_csv('original_trial.csv')

In [42]:
df.head()

Unnamed: 0,rp,snr,(snr)^0.5,ber,2*snr,2*ber,snr+ber,(snr+ber)^2,s parameter,mean,label
0,-53.301,87.699,9.364774,0.00013,175.398,0.00026,87.69913,7691.137406,-9.301,887.632967,1
1,-53.307,87.693,9.364454,0.00013,175.386,0.00026,87.69313,7690.085056,-9.307,887.512003,1
2,-53.31,87.69,9.364294,0.00013,175.38,0.00026,87.69013,7689.558908,-9.31,887.451525,1
3,-53.316,87.684,9.363974,0.00013,175.368,0.00026,87.68413,7688.506664,-9.316,887.330573,1
4,-53.319,87.681,9.363813,0.00013,175.362,0.00026,87.68113,7687.98057,-9.319,887.2701,1


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   rp           736 non-null    float64
 1   snr          736 non-null    float64
 2    (snr)^0.5   736 non-null    float64
 3   ber          736 non-null    float64
 4   2*snr        736 non-null    float64
 5   2*ber        736 non-null    float64
 6   snr+ber      736 non-null    float64
 7   (snr+ber)^2  736 non-null    float64
 8   s parameter  736 non-null    float64
 9   mean         736 non-null    float64
 10  label        736 non-null    int64  
dtypes: float64(10), int64(1)
memory usage: 63.4 KB


In [45]:
df.describe()

Unnamed: 0,rp,snr,(snr)^0.5,ber,2*snr,2*ber,snr+ber,(snr+ber)^2,s parameter,mean,label
count,736.0,736.0,736.0,736.0,736.0,736.0,736.0,736.0,736.0,736.0,736.0
mean,-51.302576,89.697424,9.462209,0.000127,179.394848,0.000254,89.697551,8105.931828,-7.302576,935.064343,0.5
std,7.769397,7.769397,0.405268,2.1e-05,15.538794,4.2e-05,7.769376,1440.716672,7.769397,165.293778,0.50034
min,-64.202,76.798,8.763447,8.2e-05,153.596,0.000164,76.79817,5897.958846,-20.202,681.056775,0.0
25%,-56.401,84.599,9.197752,0.000113,169.198,0.000227,84.59914,7157.134443,-12.401,826.214084,0.0
50%,-51.501,89.499,9.460058,0.000125,178.998,0.00025,89.499125,8012.346355,-7.501,924.533435,0.5
75%,-47.10525,93.89475,9.68993,0.00014,187.7895,0.000279,93.894863,8816.24538,-3.10525,1016.811584,1.0
max,-30.651,110.349,10.504713,0.00017,220.698,0.000339,110.349082,12176.91992,13.349,1401.279885,1.0


In [46]:
df['ber'].value_counts()

0.000082    15
0.000082    14
0.000082    12
0.000082     5
0.000114     2
            ..
0.000116     1
0.000131     1
0.000111     1
0.000129     1
0.000142     1
Name: ber, Length: 648, dtype: int64

In [49]:
df[df['ber']>10]

Unnamed: 0,rp,snr,(snr)^0.5,ber,2*snr,2*ber,snr+ber,(snr+ber)^2,s parameter,mean,label


In [50]:
#CORRELATION---> mutual relationship or connection between two or more things.
df.corr()

Unnamed: 0,rp,snr,(snr)^0.5,ber,2*snr,2*ber,snr+ber,(snr+ber)^2,s parameter,mean,label
rp,1.0,1.0,0.999498,-0.98328,1.0,-0.983279,1.0,0.997952,1.0,0.99808,0.006655
snr,1.0,1.0,0.999498,-0.98328,1.0,-0.983279,1.0,0.997952,1.0,0.99808,0.006655
(snr)^0.5,0.999498,0.999498,1.0,-0.98854,0.999498,-0.988539,0.999498,0.995426,0.999498,0.995618,0.006754
ber,-0.98328,-0.98328,-0.98854,1.0,-0.98328,1.0,-0.98328,-0.969699,-0.98328,-0.970191,-0.007151
2*snr,1.0,1.0,0.999498,-0.98328,1.0,-0.983279,1.0,0.997952,1.0,0.99808,0.006655
2*ber,-0.983279,-0.983279,-0.988539,1.0,-0.983279,1.0,-0.983279,-0.969697,-0.983279,-0.970189,-0.007147
snr+ber,1.0,1.0,0.999498,-0.98328,1.0,-0.983279,1.0,0.997952,1.0,0.99808,0.006655
(snr+ber)^2,0.997952,0.997952,0.995426,-0.969699,0.997952,-0.969697,0.997952,1.0,0.997952,0.999998,0.006438
s parameter,1.0,1.0,0.999498,-0.98328,1.0,-0.983279,1.0,0.997952,1.0,0.99808,0.006655
mean,0.99808,0.99808,0.995618,-0.970191,0.99808,-0.970189,0.99808,0.999998,0.99808,1.0,0.006446


In [51]:
import numpy as np

In [53]:
lst_data = [[1,2,3],[3,4,np.nan],[5,6,np.nan],[np.nan,np.nan,np.nan]]

In [54]:
df=pd.DataFrame(lst_data)

In [55]:
df.head()

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,3.0,4.0,
2,5.0,6.0,
3,,,


In [71]:
df.iloc[0:3,0:2]

Unnamed: 0,0,1
0,1.0,2.0
1,3.0,4.0
2,5.0,6.0


In [64]:
#droping nan values
df.dropna(axis=0)

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


In [61]:
df.dropna(axis=1)

0
1
2
3
