# Intro to Pandas

In [2]:
import numpy as np
import pandas as pd

In [3]:
s1 = pd.Series([1, 2.0, 5, 4])
s1

0    1.0
1    2.0
2    5.0
3    4.0
dtype: float64

In [4]:
type(s1)

pandas.core.series.Series

In [5]:
s1.dtype

dtype('float64')

In [6]:
s2 = pd.Series(np.arange(1,10))
s2

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

In [7]:
s2[1]

2

In [8]:
s2[8]

9

In [9]:
s2[-1] # doesn't work

KeyError: -1

In [10]:
arr = s2.to_numpy()
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [11]:
arr[-1]

9

In [13]:
s3 = pd.Series(np.linspace(10,11,5), index=['a','b','c','d','e'])
s3

a    10.00
b    10.25
c    10.50
d    10.75
e    11.00
dtype: float64

In [14]:
s3['a']

10.0

s3['z']

In [15]:
s3['z']

KeyError: 'z'

In [16]:
s3.get('z')

In [17]:
s3.get('z', np.nan)

nan

In [19]:
s3.get('a')

10.0

In [18]:
s4 = pd.Series(np.arange(0,5), name='letsgivethisaname')
s4

0    0
1    1
2    2
3    3
4    4
Name: letsgivethisaname, dtype: int64

In [20]:
s4.name

'letsgivethisaname'

In [21]:
s5 = s4.rename('whatsmyname')

In [22]:
s5

0    0
1    1
2    2
3    3
4    4
Name: whatsmyname, dtype: int64

# Pandas DataFrame

In [23]:
d = {'a': [10,20,30,40], 'b':[40,30,20,10]}
df = pd.DataFrame(d)
df

Unnamed: 0,a,b
0,10,40
1,20,30
2,30,20
3,40,10


# From Dictionary of Series

In [27]:
d = {'first': pd.Series([10,20,30,40], index=['a', 'b','c','d']), 'second': pd.Series([40,30,20,10], index=['a', 'b','c','d'])}
df = pd.DataFrame(d)
df

Unnamed: 0,first,second
a,10,40
b,20,30
c,30,20
d,40,10


# DataFrame Attributes

In [29]:
df.values

array([[10, 40],
       [20, 30],
       [30, 20],
       [40, 10]])

In [30]:
df.columns

Index(['first', 'second'], dtype='object')

In [31]:
df.shape

(4, 2)

# Column Section

In [33]:
df['first']

a    10
b    20
c    30
d    40
Name: first, dtype: int64

In [35]:
df[['first']]

Unnamed: 0,first
a,10
b,20
c,30
d,40


# Data Frame Access by Label

In [36]:
df.iloc[0]

first     10
second    40
Name: a, dtype: int64

In [37]:
df.iloc[0:3]

Unnamed: 0,first,second
a,10,40
b,20,30
c,30,20


In [38]:
df.loc['b']

first     20
second    30
Name: b, dtype: int64

In [41]:
df.loc['a']

first     10
second    40
Name: a, dtype: int64