<a href="https://colab.research.google.com/github/aviraltyagi/MLBasics/blob/main/Basic_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [149]:
import pandas as pd
import numpy as np

## Basic DataFrame Operations

In [150]:
a = np.array([(1,2,3,4),
              (2,3,4,5),
              (3,4,5,6),
              (4,5,6,7)])

In [151]:
df = pd.DataFrame(data= a, columns= ["A", "B", "C", "D"])
df

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,2,3,4,5
2,3,4,5,6
3,4,5,6,7


In [152]:
df.head()

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,2,3,4,5
2,3,4,5,6
3,4,5,6,7


In [153]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       4 non-null      int64
 1   B       4 non-null      int64
 2   C       4 non-null      int64
 3   D       4 non-null      int64
dtypes: int64(4)
memory usage: 256.0 bytes


In [154]:
it_items = df.iteritems()

In [155]:
next(it_items)

('A', 0    1
 1    2
 2    3
 3    4
 Name: A, dtype: int64)

In [156]:
next(it_items)

('B', 0    2
 1    3
 2    4
 3    5
 Name: B, dtype: int64)

In [157]:
it_rows = df.iterrows()

In [158]:
next(it_rows)

(0, A    1
 B    2
 C    3
 D    4
 Name: 0, dtype: int64)

In [159]:
# Gather columns into rows

pd.melt(frame= df, value_vars= ["A", "B", "C", "D"], value_name= "Value")

Unnamed: 0,variable,Value
0,A,1
1,A,2
2,A,3
3,A,4
4,B,2
5,B,3
6,B,4
7,B,5
8,C,3
9,C,4


## Advance Indexing

In [160]:
# loc - It gets rows(or columns) with particular labels from the index. It works with labels
df.loc[:, (df > 5).any()]

Unnamed: 0,C,D
0,3,4
1,4,5
2,5,6
3,6,7


In [161]:
df.loc[df['C'] > 4, (df > 5).any()]

Unnamed: 0,C,D
2,5,6
3,6,7


In [162]:
# iloc - It gets rows(or columns) at particular position in the index. It only works with integer.

df.iloc[:2, :2]

Unnamed: 0,A,B
0,1,2
1,2,3


In [163]:
df.set_index('A')

Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,3,4
2,3,4,5
3,4,5,6
4,5,6,7


## DataFrame Analysis

In [164]:
df["A"].unique()

array([1, 2, 3, 4])

In [165]:
df.duplicated()

0    False
1    False
2    False
3    False
dtype: bool

In [166]:
df1 = pd.DataFrame({"A": [1,1,3,4],
                    "B": [2,2,3,4],
                    "C": [3,3,4,5],
                    "D": [4,4,4,5]})

In [167]:
df1

Unnamed: 0,A,B,C,D
0,1,2,3,4
1,1,2,3,4
2,3,3,4,4
3,4,4,5,5


In [168]:
df1.duplicated()

0    False
1     True
2    False
3    False
dtype: bool

In [169]:
df1.drop_duplicates(keep='first')

Unnamed: 0,A,B,C,D
0,1,2,3,4
2,3,3,4,4
3,4,4,5,5


In [170]:
# Aggregation
df2 = pd.DataFrame({"Animal": ["Dog", "Dog", "Cat", "Cat"],
                    "Count": [3, 4, 3, 2]})
df2

Unnamed: 0,Animal,Count
0,Dog,3
1,Dog,4
2,Cat,3
3,Cat,2


In [171]:
df2.groupby(by= ["Animal"]).sum()

Unnamed: 0_level_0,Count
Animal,Unnamed: 1_level_1
Cat,5
Dog,7


## Combining Data

In [172]:
df1 = pd.DataFrame({"A": ["a", "b", "c"],
                    "B": [3, 5, 7]})

df2 = pd.DataFrame({"A": ["a", "b", "d"],
                    "D": [2, 4, 6]})

In [173]:
pd.merge(df1, df2, how= 'left', on= 'A')

Unnamed: 0,A,B,D
0,a,3,2.0
1,b,5,4.0
2,c,7,


In [174]:
pd.merge(df1, df2, how= 'right', on= 'A')

Unnamed: 0,A,B,D
0,a,3.0,2
1,b,5.0,4
2,d,,6


In [175]:
pd.merge(df1, df2, how= 'inner', on= 'A')

Unnamed: 0,A,B,D
0,a,3,2
1,b,5,4


In [176]:
pd.merge(df1, df2, how= 'outer', on= 'A')

Unnamed: 0,A,B,D
0,a,3.0,2.0
1,b,5.0,4.0
2,c,7.0,
3,d,,6.0


In [177]:
pd.concat([df1, df2], axis = 1, join= "inner")

Unnamed: 0,A,B,A.1,D
0,a,3,a,2
1,b,5,b,4
2,c,7,d,6
