In [4]:
"""
Pandas
"""

import pandas as pd
import numpy as np

In [6]:
# Display the help document
np?

[1;31mType:[0m        module
[1;31mString form:[0m <module 'numpy' from 'C:\\Users\\user\\anaconda3\\lib\\site-packages\\numpy\\__init__.py'>
[1;31mFile:[0m        c:\users\user\anaconda3\lib\site-packages\numpy\__init__.py
[1;31mDocstring:[0m  
NumPy
=====

Provides
  1. An array object of arbitrary homogeneous items
  2. Fast mathematical operations over arrays
  3. Linear Algebra, Fourier Transforms, Random Number Generation

How to use the documentation
----------------------------
Documentation is available in two forms: docstrings provided
with the code, and a loose standing reference guide, available from
`the NumPy homepage <https://www.scipy.org>`_.

We recommend exploring the docstrings using
`IPython <https://ipython.org>`_, an advanced Python shell with
TAB-completion and introspection capabilities.  See below for further
instructions.

The docstring examples assume that `numpy` has been imported as `np`::

  >>> import numpy as np

Code snippets are indicated by th

In [12]:
# print the version of pandas
pd.__version__

'1.2.4'

In [14]:
"""
Pandas Objects: Series
"""
# create a series from an array

ser = pd.Series([0.25, 0.5, 0.75, 1.0]) #constructor method
print(ser)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [19]:
# Two main attributes: 'values' and 'index'
arr = ser.values
print(arr)
ind = ser.index
print(ind)
print(ind.values) #실제 인덱스 값만 호출

[0.25 0.5  0.75 1.  ]
RangeIndex(start=0, stop=4, step=1)
[0 1 2 3]


In [21]:
# Label-based Indexing

ser = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(ser)

print(ser['a'])

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
0.25


In [2]:
"""
Dictionary and Series
"""
dict = {'a': 1, 2: 'two', 'third': True}
print(dict)

{'a': 1, 2: 'two', 'third': True}


In [5]:
"""
Create a series from a dictionary
"""
population_dict = {'California': 38332521,
                    'Texas': 26448193,
                    "New York": 19651127,
                    'Florida': 19552860,
                    'Illinois': 12882135}
population = pd.Series(population_dict)
print(population)
population['California':'New York']

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64


California    38332521
Texas         26448193
New York      19651127
dtype: int64

In [6]:
"""
Pandas object: DataFrame
"""
area_dict = {'California': 423967,'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}

area = pd.Series(area_dict)
print(area)

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
dtype: int64


In [7]:
# Constructor a DataFrame containing 'population' and 'area' Series

states = pd.DataFrame({'population': population, 'area': area})
print(states, '\n')
print(states.index, '\n')
print(states.columns)

            population    area
California    38332521  423967
Texas         26448193  695662
New York      19651127  141297
Florida       19552860  170312
Illinois      12882135  149995 

Index(['California', 'Texas', 'New York', 'Florida', 'Illinois'], dtype='object') 

Index(['population', 'area'], dtype='object')


In [12]:
# Constructor a DataFrame from a 2D NumPy array

print(states['area'], '\n')

arr = np.random.rand(3, 2)
print(arr, '\n')
pd.DataFrame(arr, columns=['foo', 'bar'], index=['a', 'b', 'c'])

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64 

[[0.25191001 0.62471158]
 [0.55606846 0.81063778]
 [0.23356242 0.61791431]] 



Unnamed: 0,foo,bar
a,0.25191,0.624712
b,0.556068,0.810638
c,0.233562,0.617914


In [15]:
"""
Series object manipulation: dictionary-style
"""

ser = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(ser, '\n')
print(ser[0])
print(ser['b'])
print('a' in ser)
print(0.25 in ser)

print(ser.keys()) #same as index
print(ser.index)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64 

0.25
0.5
True
False
Index(['a', 'b', 'c', 'd'], dtype='object')
Index(['a', 'b', 'c', 'd'], dtype='object')


In [52]:
"""
Series object manipulation: array-style
"""

print(ser.keys())
ser['e'] = 1.25
ser['a'] = 0.125
print(ser.keys())
print(ser)

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
a    0.125
b    0.500
c    0.750
d    1.000
e    1.250
dtype: float64


In [19]:
"""
DataFrame object Manipulation
"""

print(states['area'],'\n') # more useful !!
print(states.area)

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64 

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64


In [20]:
"""
DataFrame  object manipulation
"""

states['density'] = states['population'] / states['area']
print(states)

            population    area     density
California    38332521  423967   90.413926
Texas         26448193  695662   38.018740
New York      19651127  141297  139.076746
Florida       19552860  170312  114.806121
Illinois      12882135  149995   85.883763


In [21]:
# Indexers

print(states.loc['California':'New York'], '\n')
print(states.iloc[:3, 1:2])

            population    area     density
California    38332521  423967   90.413926
Texas         26448193  695662   38.018740
New York      19651127  141297  139.076746 

              area
California  423967
Texas       695662
New York    141297


In [29]:
# Masking anf fancy indexing using the Loc indexer 

print(states.loc[states.density>100, ['population', 'density']], '\n')

          population     density
New York    19651127  139.076746
Florida     19552860  114.806121 

