# Pandas

In [1]:
# Series 1D >  Pandas.Series
# DataFrame 2D >  Pandas.dataframe
# Panel 3D >  Pandas.Panel

# Series


In [2]:
import pandas as pd
my_series = pd.Series([1, 2, 3,4,5],index=['row1','row2','row3','row4','row5'])
my_series

row1    1
row2    2
row3    3
row4    4
row5    5
dtype: int64

In [3]:
#Show Values
my_series.values


array([1, 2, 3, 4, 5], dtype=int64)

In [4]:
# Show index
my_series.index

Index(['row1', 'row2', 'row3', 'row4', 'row5'], dtype='object')

In [5]:
# Select index
my_series.row2

2

In [6]:
# Select index
my_series['row2']

2

# Boolean indexing

In [7]:
my_series[my_series>3]

row4    4
row5    5
dtype: int64

In [8]:
# Example : Set alphabet label as new index
my_series.index = ['A','B','C','D','E']
my_series


A    1
B    2
C    3
D    4
E    5
dtype: int64

# DataFrame

In [9]:
# Two-dimensional size-mutable, potentially heterogeneous tabular data
#structure with labeled axes (rows and columns).

In [10]:
# Create DataFrame with Array

In [12]:
import numpy as np
my_array = np.array([[1 ,5 ,9 ,13],[2 ,6 ,10 ,14],[3 ,7 ,11 ,15],[4 ,8 ,12 ,16]])
my_df = pd.DataFrame(my_array,index=['row1' ,'row2' ,'row3' ,'row4'],columns=['col1' ,'col2' ,'col3' ,'col4'])
my_df

Unnamed: 0,col1,col2,col3,col4
row1,1,5,9,13
row2,2,6,10,14
row3,3,7,11,15
row4,4,8,12,16


In [13]:
#Create DataFrame with Dictionary

In [14]:
my_dict = {'col1':[1,2,3,4],'col2':[5,6,7,8],'col3':[9,10,11,12],'col4':[13,14,15,19]}
my_df = pd.DataFrame(my_dict, index=['row1','row2','row3','row4'])
my_df

Unnamed: 0,col1,col2,col3,col4
row1,1,5,9,13
row2,2,6,10,14
row3,3,7,11,15
row4,4,8,12,19


In [16]:
#Show index
my_df.index



Index(['row1', 'row2', 'row3', 'row4'], dtype='object')

In [17]:

#Show Columns
my_df.columns

Index(['col1', 'col2', 'col3', 'col4'], dtype='object')

In [18]:
#Show Value
my_df.values

array([[ 1,  5,  9, 13],
       [ 2,  6, 10, 14],
       [ 3,  7, 11, 15],
       [ 4,  8, 12, 19]], dtype=int64)

# Selecting


In [20]:
my_df


Unnamed: 0,col1,col2,col3,col4
row1,1,5,9,13
row2,2,6,10,14
row3,3,7,11,15
row4,4,8,12,19


In [22]:
my_df.loc['row1'][:]

col1     1
col2     5
col3     9
col4    13
Name: row1, dtype: int64

In [23]:
my_df.loc['row1']['col2']

5

In [24]:
my_df.iloc[0][:]

col1     1
col2     5
col3     9
col4    13
Name: row1, dtype: int64

In [25]:
my_df.iloc[1][3]

14

# Edit a DataFrame

In [26]:
# ADD COL
my_df['col5'] = [20 ,21 ,22 ,23]
my_df

Unnamed: 0,col1,col2,col3,col4,col5
row1,1,5,9,13,20
row2,2,6,10,14,21
row3,3,7,11,15,22
row4,4,8,12,19,23


In [27]:
#CHANG VALUE INSIDE DataFrame
my_df.loc[['row1','row2'],'col1'] = 0
my_df

Unnamed: 0,col1,col2,col3,col4,col5
row1,0,5,9,13,20
row2,0,6,10,14,21
row3,3,7,11,15,22
row4,4,8,12,19,23


In [None]:
# Reset index

In [28]:
my_df.reset_index(drop=True)

Unnamed: 0,col1,col2,col3,col4,col5
0,0,5,9,13,20
1,0,6,10,14,21
2,3,7,11,15,22
3,4,8,12,19,23


In [29]:
# DELETING, AXIS=1 Means on col

my_df.drop('col5',axis=1)

Unnamed: 0,col1,col2,col3,col4
row1,0,5,9,13
row2,0,6,10,14
row3,3,7,11,15
row4,4,8,12,19


In [30]:
# Renaming
my_df.rename(columns={'col4':'col_four'})


Unnamed: 0,col1,col2,col3,col_four,col5
row1,0,5,9,13,20
row2,0,6,10,14,21
row3,3,7,11,15,22
row4,4,8,12,19,23


In [31]:
# Replacing 0 change to 1
my_df.replace({0:1},regex=True)



Unnamed: 0,col1,col2,col3,col4,col5
row1,1,5,9,13,20
row2,1,6,10,14,21
row3,3,7,11,15,22
row4,4,8,12,19,23


# Apply function on index

In [32]:
#change col1 to float

my_df.col1 = ['{:3.2f}'.format(x) for x in my_df.iloc[:,0] ]
my_df

Unnamed: 0,col1,col2,col3,col4,col5
row1,0.0,5,9,13,20
row2,0.0,6,10,14,21
row3,3.0,7,11,15,22
row4,4.0,8,12,19,23


In [33]:
my_df['col2'] = my_df['col2'].apply(lambda x:'{0:3.2f}'.format(x))
my_df

Unnamed: 0,col1,col2,col3,col4,col5
row1,0.0,5.0,9,13,20
row2,0.0,6.0,10,14,21
row3,3.0,7.0,11,15,22
row4,4.0,8.0,12,19,23


# Sorting

In [34]:
#axis=1 then col is not ascending
my_df.sort_index(axis=1,ascending=False)


Unnamed: 0,col5,col4,col3,col2,col1
row1,20,13,9,5.0,0.0
row2,21,14,10,6.0,0.0
row3,22,15,11,7.0,3.0
row4,23,19,12,8.0,4.0


In [35]:
my_df.sort_index(axis=0,ascending=False)

Unnamed: 0,col1,col2,col3,col4,col5
row4,4.0,8.0,12,19,23
row3,3.0,7.0,11,15,22
row2,0.0,6.0,10,14,21
row1,0.0,5.0,9,13,20


In [36]:
# sort values
my_df.sort_values(by='col1',ascending=False)

Unnamed: 0,col1,col2,col3,col4,col5
row4,4.0,8.0,12,19,23
row3,3.0,7.0,11,15,22
row1,0.0,5.0,9,13,20
row2,0.0,6.0,10,14,21


In [40]:
# sort values
my_df.sort_values(by='col1',ascending=True)

Unnamed: 0,col1,col2,col3,col4,col5
row1,0.0,5.0,9,13,20
row2,0.0,6.0,10,14,21
row3,3.0,7.0,11,15,22
row4,4.0,8.0,12,19,23


# Methods

In [42]:
# show first 5 lines 
my_df.head()

Unnamed: 0,col1,col2,col3,col4,col5
row1,0.0,5.0,9,13,20
row2,0.0,6.0,10,14,21
row3,3.0,7.0,11,15,22
row4,4.0,8.0,12,19,23


In [43]:
my_df.head(2)

Unnamed: 0,col1,col2,col3,col4,col5
row1,0.0,5.0,9,13,20
row2,0.0,6.0,10,14,21


In [44]:
my_df.tail(2)

Unnamed: 0,col1,col2,col3,col4,col5
row3,3.0,7.0,11,15,22
row4,4.0,8.0,12,19,23


In [46]:
#import data

#data=pd.read-csv ('c://)
#data=pd.read-excel ('c://)
#data
