## Selecting Entries

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
ser1=Series(np.arange(3),index=['A','B','C'])

In [3]:
ser1

A    0
B    1
C    2
dtype: int64

In [4]:
ser1=2*ser1

In [5]:
ser1

A    0
B    2
C    4
dtype: int64

In [6]:
print(ser1['B'])

#or the index still starts by 0
print(ser1[1])

2
2


In [7]:
#to grab a range of indices 
ser1[:3]

A    0
B    2
C    4
dtype: int64

In [9]:
# selecting indices by logic
#example: select all indices where value is bigger than 3
ser1[ser1>3]

C    4
dtype: int64

## Selecting Entries DF

In [10]:
dframe=DataFrame(np.arange(25).reshape(5,5), index=['NYC','LA','SF','DC','CH'], 
                 columns=['A','B','C','D','E'])

In [11]:
dframe

Unnamed: 0,A,B,C,D,E
NYC,0,1,2,3,4
LA,5,6,7,8,9
SF,10,11,12,13,14
DC,15,16,17,18,19
CH,20,21,22,23,24


In [12]:
dframe['B']

NYC     1
LA      6
SF     11
DC     16
CH     21
Name: B, dtype: int64

In [13]:
dframe[['B','D']]

Unnamed: 0,B,D
NYC,1,3
LA,6,8
SF,11,13
DC,16,18
CH,21,23


In [15]:
#grab all rows where the column values are bigger than 8
dframe[dframe['C']>8]

Unnamed: 0,A,B,C,D,E
SF,10,11,12,13,14
DC,15,16,17,18,19
CH,20,21,22,23,24


In [16]:
#boolian check
dframe>10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
CH,True,True,True,True,True


In [18]:
#column values for a a row
dframe.loc['LA']

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int64

## Data Alignment

In [19]:
ser1

A    0
B    2
C    4
dtype: int64

In [20]:
ser1=Series([0,1,2], index=['A','B','C'])

In [21]:
ser2=Series([3,4,5,6], index=['A','B','C','D'])

In [22]:
ser1,ser2

(A    0
 B    1
 C    2
 dtype: int64, A    3
 B    4
 C    5
 D    6
 dtype: int64)

In [24]:
#Nan added when no index value matching
ser1+ser2

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

## in DataFrame

In [25]:
dframe1 = DataFrame(np.arange(4).reshape(2,2),index=['NYC','LA'] ,columns=list('AB'))

In [26]:
dframe1

Unnamed: 0,A,B
NYC,0,1
LA,2,3


In [27]:
dframe2= DataFrame(np.arange(9).reshape(3,3), index=['NYC','SF','LA'],columns=list('ADC'))

In [28]:
dframe2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [30]:
#if done this way, all non mathcing values will be added to Nan
dframe1+dframe2

Unnamed: 0,A,B,C,D
LA,8.0,,,
NYC,0.0,,,
SF,,,,


In [31]:
# the other method of adding is

dframe1.add(dframe2,fill_value=0)

Unnamed: 0,A,B,C,D
LA,8.0,3.0,8.0,7.0
NYC,0.0,1.0,2.0,1.0
SF,3.0,,5.0,4.0


In [36]:
# operations between series and df
dframe2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [64]:
ser3=dframe2.loc['NYC']

## Ranking and sorting

In [43]:
ser1=Series(range(3), index=['C','B','A'])

In [44]:
ser1.sort_index()

A    2
B    1
C    0
dtype: int64

In [47]:
from numpy.random import randn

In [48]:
ser2=Series(randn(10))

In [49]:
ser2

0   -0.729489
1    0.110009
2    0.817551
3    0.241709
4   -2.309270
5   -1.360836
6    1.140095
7   -0.776085
8    0.836338
9    2.344975
dtype: float64

In [58]:
ser2=ser2.sort_values()

In [59]:
ser2.rank()

4     1.0
5     2.0
7     3.0
0     4.0
1     5.0
3     6.0
2     7.0
8     8.0
6     9.0
9    10.0
dtype: float64

In [60]:
import webbrowser

In [61]:
webbrowser.open('https://www.udemy.com/learning-python-for-data-analysis-and-visualization/learn/v4/t/lecture/2394442?start=0')

True