In [None]:
print("Hello Pandas operations!")

Hello Pandas operations!


In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.DataFrame({'col1':[1,2,3,4], 
              'col2':[444,555,666,444],
              'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


# Operations in a DataFrame

## Finding Unique Values

In [None]:
df['col2'].unique()

array([444, 555, 666])

In [None]:
len(df['col2'].unique())

3

In [None]:
df['col2'].nunique()

3

## Counting Values

In [None]:
df['col2'].value_counts()

444    2
555    1
666    1
Name: col2, dtype: int64

## Conditional Selections

In [None]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [None]:
# Returnning a DataFrame where 'col1' happen to be greater than two
df[df['col1']>2]

Unnamed: 0,col1,col2,col3
2,3,666,ghi
3,4,444,xyz


In [None]:
# Combining Conditional Selection
df[(df['col1']>2) & (df['col2']==444)]

Unnamed: 0,col1,col2,col3
3,4,444,xyz


## Apply Method
One of the powerful method in our tool belt When using Pandas

In [None]:
# We can grab a column and call a built-in function of it
df['col2'].sum()

2109

In [None]:
# But we can apply our custom function:
def times2(x):
  return x*2

In [None]:
# We can broadcast our function for each element in that column:
df['col2'].apply(times2)

0     888
1    1110
2    1332
3     888
Name: col2, dtype: int64

In [None]:
# Let's go ahead and apply it with lambda expression:
# This is probably the most powerful feature in Pandas: The ability to apply our custom lambda expression!
df['col2'].apply(lambda x:x*2)

0     888
1    1110
2    1332
3     888
Name: col2, dtype: int64

## Removing Columns

In [None]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [None]:
# If you want that to occurs in place, we going to have to specify 'implace=True'
df.drop('col1', axis=1)

Unnamed: 0,col2,col3
0,444,abc
1,555,def
2,666,ghi
3,444,xyz


## Returning The Columns Name & Index Attributes

In [None]:
df.columns

Index(['col1', 'col2', 'col3'], dtype='object')

In [None]:
df.index

RangeIndex(start=0, stop=4, step=1)

## Sorting & Ordering a DataFrame

In [None]:
# Just pass in the column we want to sort by:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [None]:
df.sort_values('col2')

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi


In [None]:
df.sort_values(by='col2')

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi


## Booleans

In [None]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False


# Pivot Tables
The pivot table takes simple column-wise data as input and groups the entries into a two-dimensional table that provides a **multidimensional summarization of the data**

As we build up the pivot table, I think it’s easiest to take it one step at a time. Add items and check each step to verify you are getting the results you expect. Don’t be afraid to play with the order and the variables to see what presentation makes the most sense for your needs.

In [None]:
data=pd.DataFrame({'A':['foo','foo','foo','bar','bar','bar'],
              'B':['one','one','two','two','one', 'one'],
              'C':['x','y','x','y','x','y'],
              'D':[1,3,2,5,4,1]})

#data
df=pd.DataFrame(data)

In [None]:
df

Unnamed: 0,A,B,C,D
0,foo,one,x,1
1,foo,one,y,3
2,foo,two,x,2
3,bar,two,y,5
4,bar,one,x,4
5,bar,one,y,1


In [None]:
# Pivot method takes 3 values: values, index, and columns
df.pivot_table(values='D', index=['A','B'], columns='C')

Unnamed: 0_level_0,C,x,y
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,1.0
bar,two,,5.0
foo,one,1.0,3.0
foo,two,2.0,


In [None]:
# https://medium.com/jungletronics/pandas-operations-4b8f7a4b4139
print("Thanks everyone! See you in the Next Pandas Lecture o/")

Thanks everyone! See you in the Next Pandas Lecture o/
