In [39]:
import pandas as pd
import numpy as np

# # Empty series

In [2]:
s = pd.Series()
s

Series([], dtype: float64)

In [3]:
s = pd.Series([1,2,3,4]) # default index start from 0
s

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
s = pd.Series([1,2,3,4], index=['a','b','c','d']) # change the index
s

a    1
b    2
c    3
d    4
dtype: int64

In [5]:
dict = pd.Series({'a':0.,'b':1.,'c':2.}) # create series from dictionary

In [6]:
dict

a    0.0
b    1.0
c    2.0
dtype: float64

In [8]:
dict = pd.Series({'a':0.,'b':1.,'c':2.}, dtype='int') # set datatype integer default was float

In [9]:
dict

a    0
b    1
c    2
dtype: int32

In [13]:
# change the index if we pass any other index like x,y,z it will show NaN
dict = pd.Series({'a':0.,'b':1.,'c':2.}, index=['b','a','c']) 
dict

b    1.0
a    0.0
c    2.0
dtype: float64

In [19]:
# create series of scalar
scal = pd.Series(5,index=[0,1,2,3]) # match index and repeat the value equal to index 
scal

0    5
1    5
2    5
3    5
dtype: int64

In [28]:
s = pd.Series([1,2,3,4,5,6])
s[:4] # start from 0 and end 4

2    3
3    4
4    5
5    6
dtype: int64

In [29]:
s[-3:] # last 3 elements

3    4
4    5
5    6
dtype: int64

## DataFrame

In [31]:
# create DataFrame using list default column name is 0 and index start from 0,1,2.....
df = pd.DataFrame([4,5,7,8,0])
df

Unnamed: 0,0
0,4
1,5
2,7
3,8
4,0


In [34]:
# change the index of DataFrame
df = pd.DataFrame([4,5,7,8,0], index=['a','b','c','d','e'], columns=['A'])
df

Unnamed: 0,A
a,4
b,5
c,7
d,8
e,0


In [38]:
data = [['Alex',20],['John',23],['Jony',24]]
df = pd.DataFrame(data, index=['A','B','C'], columns=['Name', 'Age'], dtype='float') # set data type float
df

Unnamed: 0,Name,Age
A,Alex,20.0
B,John,23.0
C,Jony,24.0


In [41]:
# Create DataFrame using dictionary
dict = {
            'Name': ['John','Jony','Gray','Alex'],
            'Age' : [23,20,24,25],
       }
df = pd.DataFrame(dict)
df

Unnamed: 0,Name,Age
0,John,23
1,Jony,20
2,Gray,24
3,Alex,25


In [43]:
# create dataframe using list of dictionary

lst = [{'a': 1, 'b': 2},{'a':3, 'b': 5, 'c': 4}] # if key value not found return NaN
df = pd.DataFrame(lst)
df

Unnamed: 0,a,b,c
0,1,2,
1,3,5,4.0


In [60]:
# create dictionary from series
s = {'Name': pd.Series(['G','A','F'], index=['a','b','c']), 'Age': pd.Series([20,30,40,50], index=['a','b','c','d'])}
df = pd.DataFrame(s)
df

Unnamed: 0,Name,Age
a,G,20
b,A,30
c,F,40
d,,50


In [65]:
# Select element by label, .loc used for that
df.loc['a']

Name     G
Age     20
Name: a, dtype: object

In [66]:
df.loc['d']

Name    NaN
Age      50
Name: d, dtype: object

In [67]:
# selection by integer location
df.iloc[1] # select second record index start from 0

Name     A
Age     30
Name: b, dtype: object

In [71]:
df[1:3]

Unnamed: 0,Name,Age
b,A,30
c,F,40


In [81]:
# add row(s)
# df1 = {'Demo': pd.Series([1,2,3])}
# df = df.append(df1)
# df

In [119]:
# Transpose Rows will column and column will be row
df.T

Unnamed: 0,a,b,c,d
Name,G,A,F,
Age,20,30,40,50.0


# Panel

In [105]:
# create Panel

data = np.random.rand(2,4,5)
p = pd.Panel(data)
p

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  exec(code_obj, self.user_global_ns, self.user_ns)


<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 4 (major_axis) x 5 (minor_axis)
Items axis: 0 to 1
Major_axis axis: 0 to 3
Minor_axis axis: 0 to 4

## Pandas - Basic Functionality

In [109]:
s = pd.Series(np.random.randn(4))
s

0    0.700556
1    0.140597
2   -0.972902
3   -0.742629
dtype: float64

In [110]:
print(s.axes)

[RangeIndex(start=0, stop=4, step=1)]


In [112]:
s.empty # check empty or not return blooean value 

False

In [113]:
s.ndim # array dimension

1

In [114]:
s.size # total values count

4

In [116]:
s.values

array([ 0.70055642,  0.14059723, -0.97290194, -0.74262929])

In [117]:
s.head(2)

0    0.700556
1    0.140597
dtype: float64

In [118]:
s.tail(2)

2   -0.972902
3   -0.742629
dtype: float64

In [121]:
s.shape

(4,)

In [123]:
s.sum()

-0.8743775769162201

In [125]:
s.mean()

-0.21859439422905502

In [126]:
s.std()

0.7783421033781422

In [133]:
tb = pd.DataFrame(np.random.randn(5,3), columns=['col1','col2','col3'])
tb

Unnamed: 0,col1,col2,col3
0,-0.935241,-0.028642,-0.153422
1,0.187142,-0.077272,0.924268
2,-0.814245,-0.795544,-0.411433
3,-0.206274,0.969879,-0.050408
4,0.2499,1.200652,-1.26345


In [140]:
def adder(ele1,ele2):
    return ele1 + ele2

tb = pd.DataFrame(np.random.randn(5,3), columns=['col1','col2','col3'])
# print(tb)
tb.pipe(adder,2) # add 2 
tb.apply(np.mean)

col1    0.187049
col2    0.106682
col3    0.420035
dtype: float64

In [152]:
print(tb)
print(tb['col1'].map(lambda x: x*100))
tb.apply(np.mean)

       col1      col2      col3
0 -0.109319 -0.105102 -1.064994
1  2.279106  1.438149  1.232220
2 -1.092363 -0.519390  2.335351
3 -0.563887  1.289662 -1.296113
4  0.421708 -1.569907  0.893710
0    -10.931874
1    227.910579
2   -109.236347
3    -56.388709
4     42.170791
Name: col1, dtype: float64


col1    0.187049
col2    0.106682
col3    0.420035
dtype: float64

In [155]:
s = pd.Series(['Tom', 'William Rick', '@John'])
print(s.str.islower())

0    False
1    False
2    False
dtype: bool


In [156]:
print(s.str.isupper())

0    False
1    False
2    False
dtype: bool


In [157]:
print(s.str.upper())

0             TOM
1    WILLIAM RICK
2           @JOHN
dtype: object


In [162]:
print(s.str.strip(' '))

0             Tom
1    William Rick
2           @John
dtype: object


In [163]:
print(s.str.len())

0     3
1    12
2     5
dtype: int64


In [164]:
print(s.str.startswith('T'))

0     True
1    False
2    False
dtype: bool


In [165]:
print(s.str.endswith('s'))

0    False
1    False
2    False
dtype: bool


In [167]:
print(s.str.count('o'))

0    1
1    0
2    1
dtype: int64


In [168]:
print(s.str.cat(sep='-'))

Tom-William Rick-@John


In [170]:
print(s.str.repeat(2))

0                      TomTom
1    William RickWilliam Rick
2                  @John@John
dtype: object


In [171]:
print(s.str.replace('@','$'))

0             Tom
1    William Rick
2           $John
dtype: object


## Statistic function

In [173]:
s = pd.Series([1,2,3,4,5])
s.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.250000
dtype: float64

In [176]:
s = pd.DataFrame(np.random.randn(5,2))
# print(s)
s.pct_change()

          0         1
0 -1.718268  1.523625
1 -0.317645 -0.955524
2 -1.442142  0.663745
3 -0.520561  0.623530
4  0.190984  0.280370


Unnamed: 0,0,1
0,,
1,-0.815136,-1.627139
2,3.540103,-1.69464
3,-0.639036,-0.060588
4,-1.36688,-0.55035


## Missing Data

In [198]:
df = pd.DataFrame(np.random.randn(5,3), index=['a', 'c', 'd', 'e', 'f'], columns=['One', 'Two', 'Three'])
df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f','h'])
df

Unnamed: 0,One,Two,Three
a,0.765075,0.924214,-1.862509
b,,,
c,-2.406153,-1.373755,0.733296
d,0.019783,0.130526,-0.661303
e,-0.339707,0.869465,-1.249547
f,-0.582244,-1.058891,2.133865
h,,,


In [199]:
df.isnull()

Unnamed: 0,One,Two,Three
a,False,False,False
b,True,True,True
c,False,False,False
d,False,False,False
e,False,False,False
f,False,False,False
h,True,True,True


In [200]:
df['One'].isnull()

a    False
b     True
c    False
d    False
e    False
f    False
h     True
Name: One, dtype: bool

In [201]:
df.fillna(0)

Unnamed: 0,One,Two,Three
a,0.765075,0.924214,-1.862509
b,0.0,0.0,0.0
c,-2.406153,-1.373755,0.733296
d,0.019783,0.130526,-0.661303
e,-0.339707,0.869465,-1.249547
f,-0.582244,-1.058891,2.133865
h,0.0,0.0,0.0


In [202]:
df.fillna(method='pad') # add scalar value

Unnamed: 0,One,Two,Three
a,0.765075,0.924214,-1.862509
b,0.765075,0.924214,-1.862509
c,-2.406153,-1.373755,0.733296
d,0.019783,0.130526,-0.661303
e,-0.339707,0.869465,-1.249547
f,-0.582244,-1.058891,2.133865
h,-0.582244,-1.058891,2.133865


In [204]:
df.fillna(method='backfill') 

Unnamed: 0,One,Two,Three
a,0.765075,0.924214,-1.862509
b,-2.406153,-1.373755,0.733296
c,-2.406153,-1.373755,0.733296
d,0.019783,0.130526,-0.661303
e,-0.339707,0.869465,-1.249547
f,-0.582244,-1.058891,2.133865
h,,,


In [208]:
# replace missing or generic value
df = pd.DataFrame({'one' : [10,20,30,40,50,2000], 'two': [1000,20,30,14,15,20]})
print(df)
df.replace({1000:10, 2000:60})

    one   two
0    10  1000
1    20    20
2    30    30
3    40    14
4    50    15
5  2000    20


Unnamed: 0,one,two
0,10,10
1,20,20
2,30,30
3,40,14
4,50,15
5,60,20
