In [20]:
import pandas as pd
import numpy as np

In [2]:
series = pd.Series([4, 5, 6, 7])

series

0    4
1    5
2    6
3    7
dtype: int64

In [3]:
series.values
series.index

RangeIndex(start=0, stop=4, step=1)

In [4]:
series_indx_lab = pd.Series([3, 2, 0, 1], index=['cat', 'dog', 'frog', 'hog'])

series_indx_lab

cat     3
dog     2
frog    0
hog     1
dtype: int64

In [5]:
series_indx_lab.index
series_indx_lab.frog
series_indx_lab.loc['dog': 'frog']
series_indx_lab[['dog', 'hog', 'cat']]

dog    2
hog    1
cat    3
dtype: int64

In [6]:
# You can use NumPy like functions and not break the index relationship
series_indx_lab[series_indx_lab > 1] # boolean arrays
series_indx_lab * 2 # scalar function

cat     6
dog     4
frog    0
hog     2
dtype: int64

In [7]:
# Think of a Series as a tuple dict
'cat' in series_indx_lab

True

In [8]:
# You can pass true dictionary to pandas to create Series
dict_series = {'lion': 100, 'elephant': 200, 'hippo': 77}
series2 = pd.Series(dict_series)
series2

elephant    200
hippo        77
lion        100
dtype: int64

In [9]:
# You can pass the dict keys in as a list to override order in the dict
series3 = pd.Series(dict_series, index=['froggy', 'lion', 'hippo', 'elephant'])
series3

froggy        NaN
lion        100.0
hippo        77.0
elephant    200.0
dtype: float64

In [10]:
# when you're checking for null, there's a top-level function in pandas and instance methods
pd.isnull(series2)
series2.isnull()
series2.notnull()
pd.notnull(series2)

elephant    True
hippo       True
lion        True
dtype: bool

In [11]:
# Series are cool because when performing operations across series it aligns the index
series2 + series3

elephant    400.0
froggy        NaN
hippo       154.0
lion        200.0
dtype: float64

In [12]:
# There is meta data about Series structures
series3.name = 'pop'
series3.index.name = 'animal'
series3

animal
froggy        NaN
lion        100.0
hippo        77.0
elephant    200.0
Name: pop, dtype: float64

In [13]:
# DataFrames!
data = {'state': ['A', 'B', 'C'],
        'year': [2016, 2017, 2018],
        'pop': [123, 234, 345]}
pd.DataFrame(data)

Unnamed: 0,pop,state,year
0,123,A,2016
1,234,B,2017
2,345,C,2018


In [14]:
# you can easily reorganize headers
pd.DataFrame(data, columns=['state', 'year', 'pop'])

Unnamed: 0,state,year,pop
0,A,2016,123
1,B,2017,234
2,C,2018,345


In [15]:
# Like series, if you pass in col without values, it'll be undefined
frame = pd.DataFrame(data, columns=['another', 'state', 'year', 'pop'])
frame

Unnamed: 0,another,state,year,pop
0,,A,2016,123
1,,B,2017,234
2,,C,2018,345


In [16]:
# DataFrames have cool access methods
frame.state
frame['state']
frame.columns
frame.index
frame.values
frame.state.value_counts
frame.head()
# etc

Unnamed: 0,another,state,year,pop
0,,A,2016,123
1,,B,2017,234
2,,C,2018,345


In [17]:
# Index location based access
frame.loc[1]
frame.iloc[1]
frame.iloc[:, 1:]
frame.loc[1, 'state':]

state       B
year     2017
pop       234
Name: 1, dtype: object

In [18]:
# You can assign values to columns similar to numpy
frame['another'] = 'doggy'
frame

Unnamed: 0,another,state,year,pop
0,doggy,A,2016,123
1,doggy,B,2017,234
2,doggy,C,2018,345


In [22]:
# Use numpy range to assign values
frame.another = np.arange(6)

In [29]:
# Quickly on index objects
series4 = pd.Series(range(3), index=['first', 'second', 'third'])

series4

index_obj = series4.index

index_obj

Index(['first', 'second', 'third'], dtype='object')

In [31]:
# They're immutable

index_obj[1]

index_obj[1] = 'soemthing'

TypeError: Index does not support mutable operations

In [39]:
# you can use index objects across other data structures/objects

series5 = pd.Series(np.random.randint(0, high=19, size=3), index=index_obj)

series5

first     17
second    14
third      4
dtype: int32

In [47]:
# There are some handy methods and properties

series5.index.unique()
series5.index.is_categorical()
series5.index.isin(['first'])

array([ True, False, False])