In [6]:
import pandas as pd
import numpy as np

In [7]:
# Display the help document
np?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'numpy' from 'C:\\Users\\phw50\\anaconda3\\lib\\site-packages\\numpy\\__init__.py'>
[1;31mFile:[0m        c:\users\phw50\anaconda3\lib\site-packages\numpy\__init__.py
[1;31mDocstring:[0m  
NumPy
=====

Provides
  1. An array object of arbitrary homogeneous items
  2. Fast mathematical operations over arrays
  3. Linear Algebra, Fourier Transforms, Random Number Generation

How to use the documentation
----------------------------
Documentation is available in two forms: docstrings provided
with the code, and a loose standing reference guide, available from
`the NumPy homepage <https://www.scipy.org>`_.

We recommend exploring the docstrings using
`IPython <https://ipython.org>`_, an advanced Python shell with
TAB-completion and introspection capabilities.  See below for further
instructions.

The docstring examples assume that `numpy` has been imported as `np`::

  >>> import numpy as np

Code snippets are indicated by 

In [8]:
# Print the version of pandas
pd.__version__

'1.2.4'

In [11]:
"""
    Pandas Objects: Series
"""
#Create a series from an array

ser = pd.Series([0.25, 0.5, 0.75, 1.0]) # constructor method\
print(ser)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [13]:
# Two main attributes: 'values' and 'index'
arr = ser.values
print(arr)

ind = ser.index
print(ind)

[0.25 0.5  0.75 1.  ]
RangeIndex(start=0, stop=4, step=1)


In [15]:
# Label-based Indexing

ser = pd.Series([0.25, 0.5, 0.75, 1.0], index = ['a', 'b', 'c', 'd'])
print(ser)

print(ser['a'])

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
0.25


In [None]:
"""
Dictionary and Series
"""

In [23]:
"""
Create a series from a dictionary
"""
population_dict = {
'California': 38332521,
'Texas': 26448193,
"New York": 19651127,
'Florida': 19552860,
'Illinois': 12882135
}
population = pd.Series(population_dict)
print(population)

print(population['California' : 'New York'])

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64
California    38332521
Texas         26448193
New York      19651127
dtype: int64


In [24]:
"""
Pandas object: DataFrame
"""

area_dict = {'California': 423967,'Texas': 695662, 'New York': 141297,
            'Florida': 170312, 'Illinois': 149995}

area = pd.Series(area_dict)
print(area)

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64


In [28]:
# Construct a DataFrame containing 'population' and 'area' Series

states = pd.DataFrame({'population': population, 'area': area})
print(states)

print(states.index, '\n')
print(states.columns)

            population    area
California    38332521  423967
Texas         26448193  695662
New York      19651127  141297
Florida       19552860  170312
Illinois      12882135  149995
Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object') 

Index(['population', 'area'], dtype='object')


In [29]:
# Construct a DataFrame from a 2D Numpy array

arr = np.random.rand(3, 2)
print(arr, '\n')

df = pd.DataFrame(arr, columns = ['foo', 'bar'], index = ['a', 'b', 'c'])

print(df)


[[0.62374142 0.12078939]
 [0.29443236 0.23311672]
 [0.98694267 0.28522864]] 

        foo       bar
a  0.623741  0.120789
b  0.294432  0.233117
c  0.986943  0.285229


In [34]:
"""
Series object manipulation: dictionary-style
"""

ser = pd.Series([0.25, 0.5, 0.75, 1.0], index = ['a', 'b', 'c', 'd'])

print(ser, '\n')
print('a' in ser)
print(0.25 in ser)

print(ser.index)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64 

True
False
Index(['a', 'b', 'c', 'd'], dtype='object')


In [35]:
"""
Series object manipulation: array-style
"""

print(ser.keys())
ser['e'] = 1.25
ser['a'] = 0.125

print(ser)

Index(['a', 'b', 'c', 'd'], dtype='object')
a    0.125
b    0.500
c    0.750
d    1.000
e    1.250
dtype: float64


In [36]:
"""
Caution: Slicing Series object using explicit/implicit indexing
"""

'\nCaution: Slicing Series object using explicit/implicit indexing\n'

In [49]:
"""
DataFrame object manipulation
"""

states['density'] = states['population'] / states['area']
print(states)

            population    area     destiny     density
California    38332521  423967   90.413926   90.413926
Texas         26448193  695662   38.018740   38.018740
New York      19651127  141297  139.076746  139.076746
Florida       19552860  170312  114.806121  114.806121
Illinois      12882135  149995   85.883763   85.883763


In [42]:
# Indexer: Loc, iloc

states.iloc[:3]

Unnamed: 0,population,area,destiny
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746


In [50]:
# Masking and fancy indexing using the loc indexer

print(states.loc[states.density > 100, ['population', 'density']], '\n')


          population     density
New York    19651127  139.076746
Florida     19552860  114.806121 

