In [5]:
import pandas as pd
import numpy as np

# Pandas

### [Data Structures]
### 1.Series (1 dimensional homogeneous where size is immutable)
### 2.DataFrame (2 dimensional heteregeneous mutable)
### 3.Panels (Multi-dimensional heteregeneous)

# |Series| 

In [4]:
# Creating Empty series

s = pd.Series()
print(s)

Series([], dtype: float64)


  s = pd.Series()


In [6]:
# Example 1

data = np.array(['a','b','c','d'])
s = pd.Series(data)
print(s)


0    a
1    b
2    c
3    d
dtype: object


In [7]:
# Example 2

data = np.array(['a','b','c','d'])
s = pd.Series(data, index=[100,101,102,103])
print(s)

100    a
101    b
102    c
103    d
dtype: object


In [8]:
# Creating Series From Dictionary

#Example 1

data = {'a':0.,'b':1., 'c': 2.}
s = pd.Series(data)
print(s)

a    0.0
b    1.0
c    2.0
dtype: float64


In [9]:
#Example 2

data = {'a':0.,'b':1., 'c': 2.}
s = pd.Series(data, index = ['b','c','d','a'])
print(s)

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64


In [11]:
# Creating series From Scalar

#Example 1

s = pd.Series(5, index= [0,1,2,3])
print(s)

0    5
1    5
2    5
3    5
dtype: int64


In [13]:
#Example 2

s = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])
print(s[:3])               #retrieving first three elements
print(s[-3:])              #retrieving last three elements

a    1
b    2
c    3
dtype: int64
c    3
d    4
e    5
dtype: int64


In [14]:
# Retrieve a single element using index label value

s = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])
print(s['a'])

1


In [16]:
# Retrieve multiple elements using a list of index label values.

s = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])
print(s[['a','c','d']])

a    1
c    3
d    4
dtype: int64


In [17]:
# If a label is not contained, an exception is raised

s = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])
print(s['f'])

KeyError: 'f'

# |DATAFRAME|

In [19]:
# Creating an empty dataframe

df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [21]:
#Creating a dataframe from lists

#Example 1
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data, columns=['Name','Age'])
print(df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [22]:
#Example 2
df = pd.DataFrame(data, columns=['Name','Age'], dtype=float)
print(df)

     Name   Age
0    Alex  10.0
1     Bob  12.0
2  Clarke  13.0


In [24]:
# Example: An indexed DataFrame using arrays

data = {'Name':['Tom','Jack','Steve','Ricky'], 'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print(df)

        Name  Age
rank1    Tom   28
rank2   Jack   34
rank3  Steve   29
rank4  Ricky   42


In [27]:
# Ceating DataFrame From List of Dicts

data =[{'a':1,'b':2},{'a': 5, 'b':10,'c':20}]
# Value same as dictionary arrays
df1 = pd.DataFrame(data, index=['first','second'], columns=['a','b'])
# Two column indices with one index with other name
df2 = pd.DataFrame(data, index=['first','second'], columns=['a','b1'])
print('#df1 output')
print(df1)
print('#df2 output')
print(df2)

#df1 output
        a   b
first   1   2
second  5  10
#df2 output
        a  b1
first   1 NaN
second  5 NaN


In [31]:
# Ceating DataFrame From Dict of Series

#Example 1
d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d'])}

df = pd.DataFrame(d)
print("One ")
print(df['one'])
print("Two ")
print(df['two'])

One 
a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64
Two 
a    1
b    2
c    3
d    4
Name: two, dtype: int64


In [32]:
# Example 2 (Column Addition : Two ways)
d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d'])}

df = pd.DataFrame(d)
print("Adding a new column by passing as Series:")
df['three'] = pd.Series([10,20,30], index=['a','b','c'])
print(df)
print("Adding a new column using the existing columns in DataFrame: ")
df['four'] = df['one'] + df['three']
print(df)

Adding a new column by passing as Series:
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN
Adding a new column using the existing columns in DataFrame: 
   one  two  three  four
a  1.0    1   10.0  11.0
b  2.0    2   20.0  22.0
c  3.0    3   30.0  33.0
d  NaN    4    NaN   NaN


In [34]:
# Column Deletion (Pop and Delete)
d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d']),
     'three': pd.Series([10,20,30], index=['a','b','c'])}
df = pd.DataFrame(d)
print("Our dataframe is: ")
print(df)

#using del fun
print("Deleting the first column using DEL function")
del df['one']
print(df)

#using pop Function
print("Deleting another column using POP function")
df.pop('two')
print(df)

Our dataframe is: 
   one  two  three
a  1.0    1   10.0
b  2.0    2   20.0
c  3.0    3   30.0
d  NaN    4    NaN
Deleting the first column using DEL function
   two  three
a    1   10.0
b    2   20.0
c    3   30.0
d    4    NaN
Deleting another column using POP function
   three
a   10.0
b   20.0
c   30.0
d    NaN


In [35]:
# Row SELECTION, ADDITION, AND DELETION

# Selection by label
d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d'])}
df = pd.DataFrame(d)
print(df.loc['b'])

one    2.0
two    2.0
Name: b, dtype: float64


In [37]:
# Selection by integer location

d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d'])}
df = pd.DataFrame(d)
print(df.iloc[2])

one    3.0
two    3.0
Name: c, dtype: float64


In [38]:
# Slice Rows

## Multiple rows can be selected using ':' operator

d = {'one': pd.Series([1,2,3], index=['a','b','c']),
     'two': pd.Series([1,2,3,4], index=['a','b','c','d'])}
df = pd.DataFrame(d)
print(df[2:4])

   one  two
c  3.0    3
d  NaN    4


In [42]:
# Addition of Rows

df1 = pd.DataFrame([[1,2],[3,4]], columns = ['a','b'])
df2 = pd.DataFrame([[5,6],[7,8]], columns = ['a','b'])

df1 = df1.append(df2)
print(df1)

   a  b
0  1  2
1  3  4
0  5  6
1  7  8


In [43]:
# Deletion of Rows

df1 = pd.DataFrame([[1,2],[3,4]], columns = ['a','b'])
df2 = pd.DataFrame([[5,6],[7,8]], columns = ['a','b'])
df1 = df1.append(df2)

df1 = df1.drop(0)
print(df1)

   a  b
1  3  4
1  7  8


## Series Basic Functionality

In [44]:
# Example 1: Creating a series with 100 random numbers

s = pd.Series(np.random.randn(4))
print(s)

0    1.207158
1   -0.215822
2   -1.513830
3    0.858534
dtype: float64


In [45]:
# Exapmle 2: Returns the list of the labels of the series

s = pd.Series(np.random.randn(4))
print("The axes are:")
print(s.axes)

The axes are:
[RangeIndex(start=0, stop=4, step=1)]


In [46]:
# Example 3: Returns the boolean value saying whether the Object is empty or not

s = pd.Series(np.random.randn(4))
print("Is the object empty?")
print(s.empty)

Is the object empty?
False


In [47]:
# Example 4: Returns the number of dimensions of the object

s = pd.Series(np.random.randn(4))
print("The dimensions of the object s is: ", s.ndim)

The dimensions of the object s is:  1


In [49]:
# Example 5: Returns the size(length) of the series

s = pd.Series(np.random.randn(2))
print(s)
print("The size of the object s is: ", s.size)

0    0.565316
1    0.800515
dtype: float64
The size of the object s is:  2


In [51]:
# Example 6: Returns the actual data in the series as an array
s = pd.Series(np.random.randn(4))
print(s)

print("The actual data series is:",s.values)


0   -2.232106
1   -0.319982
2    1.270431
3    0.808577
dtype: float64
The actual data series is: [-2.23210592 -0.31998215  1.27043056  0.80857662]


In [52]:
# Head and Tail
s = pd.Series(np.random.randn(4))
print("The original series is: ")
print(s)
print("The first two rows of the data series: ")
print(s.head(2))
print("The last two rows of the data series: ")
print(s.tail(2))


The original series is: 
0   -0.329817
1   -0.216975
2   -0.230373
3   -0.621526
dtype: float64
The first two rows of the data series: 
0   -0.329817
1   -0.216975
dtype: float64
The last two rows of the data series: 
2   -0.230373
3   -0.621526
dtype: float64


## DataFrame Basic Functionality

In [54]:
# Creating a Dictionary of series

d = {'Name': pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
     'Age': pd.Series([25,26,25,23,30,39,29,23]),
     'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}

# Creating a DataFrame
df = pd.DataFrame(d)
print("Our data series is: ")
print(df)

Our data series is: 
    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   39    4.60
6   Jack   29    3.80
7    NaN   23     NaN


In [56]:
# Returns the transpose of the DataFrame

d = {'Name': pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
     'Age': pd.Series([25,26,25,23,30,39,29,23]),
     'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
df = pd.DataFrame(d)
print("The transpose of the data series is:")
print(df.T)


The transpose of the data series is:
           0      1      2     3      4      5     6    7
Name     Tom  James  Ricky   Vin  Steve  Smith  Jack  NaN
Age       25     26     25    23     30     39    29   23
Rating  4.23   3.24   3.98  2.56    3.2    4.6   3.8  NaN


In [57]:
# Returns the list of row axis labels and column axis labels

d = {'Name': pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
     'Age': pd.Series([25,26,25,23,30,39,29,23]),
     'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
df = pd.DataFrame(d)
print("Row axis labels and columns axis labels are: ")
print(df.axes)


Row axis labels and columns axis labels are: 
[RangeIndex(start=0, stop=8, step=1), Index(['Name', 'Age', 'Rating'], dtype='object')]


In [58]:
# Returns the data type of each column

d = {'Name': pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
     'Age': pd.Series([25,26,25,23,30,39,29,23]),
     'Rating': pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
df = pd.DataFrame(d)
print("The data types of each column are: ")
print(df.dtypes)

The data types of each column are: 
Name       object
Age         int64
Rating    float64
dtype: object
