In [32]:
import pandas as pd
import numpy as np

In [33]:
data = pd.Series(np.linspace(0, 1, 5)[1:], index = ['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [34]:
data['b']

np.float64(0.5)

In [35]:
'b' in data

True

In [36]:
'e' in data

False

In [37]:
0.25 in data

False

In [38]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [39]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [40]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [41]:
data['e'] = 1.25
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

In [42]:
np.sort(data)

array([0.25, 0.5 , 0.75, 1.  , 1.25])

In [43]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [44]:
data[0:4:2]

a    0.25
c    0.75
dtype: float64

In [45]:
data[(data > 0.3) & (data < 0.8)]

b    0.50
c    0.75
dtype: float64

In [46]:
area = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})

In [None]:
# framing the data with its specific variables
# its as if 'area' and 'pop' were indexes
# that indicate different vectors of a matrix
data = pd.DataFrame({'area' : area, 'pop' :pop})

In [48]:
data

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [None]:
# 'area' is a special index of data for its columns
# like, every data from column 0
# and the cities are the lines
data['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [50]:
data.area is data['area']

True

In [51]:
data.pop is data['pop']

False

In [None]:
# adding another property to data
data['density'] = data['pop'] / data['area']
data

Unnamed: 0,area,pop,density
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [57]:
# as a two dimensional array
data.values

array([[4.23967000e+05, 3.83325210e+07, 9.04139261e+01],
       [6.95662000e+05, 2.64481930e+07, 3.80187404e+01],
       [1.41297000e+05, 1.96511270e+07, 1.39076746e+02],
       [1.70312000e+05, 1.95528600e+07, 1.14806121e+02],
       [1.49995000e+05, 1.28821350e+07, 8.58837628e+01]])

In [60]:
# transposing the data frame (kind of turning it around)
data.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
area,423967.0,695662.0,141297.0,170312.0,149995.0
pop,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
density,90.41393,38.01874,139.0767,114.8061,85.88376


In [62]:
data.values[0] # data from 'California'

array([4.23967000e+05, 3.83325210e+07, 9.04139261e+01])

In [63]:
data['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [69]:
# data['California']['area']
# data['California']
# you cannot do this because 'California' is not an 
# attribute of 'data', but one common to both 'area'
# and 'pop', so it raises a error message
data['area']['California']

np.int64(423967)

In [None]:
data.iloc[:3, 2:]
# by index

Unnamed: 0,density
California,90.413926
Texas,38.01874
New York,139.076746


In [None]:
data.loc[:'Texas', :'pop']
# by attribute index i guess idk

Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193


In [89]:
# fancy indexing - masking
# cities with densities higher than 100 and its
# population and density
data.loc[data.density > 100, ['pop', 'density']]

Unnamed: 0,pop,density
New York,19651127,139.076746
Florida,19552860,114.806121


In [None]:
# using index location to change values
data.iloc[0, 2] = 90
data

Unnamed: 0,area,pop,density
California,423967,38332521,90.0
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [None]:
# i dont understand why this works and 
# data['California'] doesnt but okay
data['Texas':'Illinois']

Unnamed: 0,area,pop,density
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763
