# Pandas Library
The Pandas library is built on NumPy and provides easy-to-use
data structures and data analysis tools for the Python
programming language.

  The conventional way of importing the pandas library

In [135]:
import pandas as pd

## Pandas Data Structures
### Series
A one-dimensional labeled array
capable of holding any data type.
index | list
------|------
a | 3
b | -5
c | 7
d | 4

In [136]:
s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])

### DataFrames
A two-dimensional labeled
data structure with columns
of potentially different types.


In [137]:
details = dict(names=["Miguel", "Armin", "Rossum"],
               percent=[90, 96, 95],
               average=[70.8, 87.67, 98.12])
df = pd.DataFrame(details,
columns=['names', 'percent', 'average'])
df

Unnamed: 0,names,percent,average
0,Miguel,90,70.8
1,Armin,96,87.67
2,Rossum,95,98.12


## I/O
### Read and Write to CSV, excel and other formats

In [138]:
# ## to write from csv
# df.to_csv("dataframe.csv")
## to read from csv
df = pd.read_csv("dataframe.csv")



# similarly read_excel() and to_excel()

## Selection
`Also Check Numpy Arrays`

In [139]:
## GETTING

# 1. Get subset of a DataFrame
print("df[1:] =\n", df[1:],"\n")

# 2. Select a row
print("df.iloc[[1]] =\n", df.iloc[[1]],"\n")

# 3. Select single value by row & column labels
print("df.loc[[0], ['percent']] =\n", df.loc[[0], ['percent']], "\n")

## SETTING

# 1. Setting a new Column
df["skills"] = ["Python", "Rust, Python", "C, Python"]
df.drop("Unnamed: 0",axis=1)

df[1:] =
    Unnamed: 0   names  percent  average
1           1   Armin       96    87.67
2           2  Rossum       95    98.12 

df.iloc[[1]] =
    Unnamed: 0  names  percent  average
1           1  Armin       96    87.67 

df.loc[[0], ['percent']] =
    percent
0       90 



Unnamed: 0,names,percent,average,skills
0,Miguel,90,70.8,Python
1,Armin,96,87.67,"Rust, Python"
2,Rossum,95,98.12,"C, Python"


### Axis parameter in Pandas
Axis parameter is a very commonly used parameter in many functions of pandas.

parameter | description
---------|-------------
`axis = 0`|depicts rows
`axis = 1` |depicts columns

## Dropping Rows and Columns

In [140]:
data = pd.DataFrame(details,
                    columns=['names', 'percent', 'average'])
data = data.drop(0, axis=0) # drops specified row
data = data.drop(["average"], axis=1, ) # drops specified column
data

Unnamed: 0,names,percent
1,Armin,96
2,Rossum,95


## Filtering

In [141]:
dat = pd.DataFrame(details,
                   columns=['names', 'percent', 'average'])
dat = dat.filter(["names", "skills"], axis=1)# filter by column
dat = dat.filter([0, 2], axis=0)# filter by row
dat

Unnamed: 0,names
0,Miguel
2,Rossum


## Basic Information about a dataset

In [142]:
df = pd.read_csv("dataframe.csv")
print(f"Shape = {df.shape}\n")

print(f"Columns = {df.columns}\n")
print(f"Count Info =\n{df.count()}\n")
print(f"Full info = ")
df.info()
print("\nSummary(statistical details) = ")
print(df.describe())

Shape = (3, 4)

Columns = Index(['Unnamed: 0', 'names', 'percent', 'average'], dtype='object')

Count Info =
Unnamed: 0    3
names         3
percent       3
average       3
dtype: int64

Full info = 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  3 non-null      int64  
 1   names       3 non-null      object 
 2   percent     3 non-null      int64  
 3   average     3 non-null      float64
dtypes: float64(1), int64(2), object(1)
memory usage: 224.0+ bytes

Summary(statistical details) = 
       Unnamed: 0    percent    average
count         3.0   3.000000   3.000000
mean          1.0  93.666667  85.530000
std           1.0   3.214550  13.785148
min           0.0  90.000000  70.800000
25%           0.5  92.500000  79.235000
50%           1.0  95.000000  87.670000
75%           1.5  95.500000  92.895000
max           2.0  96.000000  98.1