## Series

- Series are similar to numpy array
- Series are built on top of numpy array
- Difference is that a series can be accessed by label

In [1]:
import numpy as np 
import pandas as pd 

### Creating series from different Python objects

In [2]:
labels = ['a', 'b', 'c']
my_data = [10, 20, 30]
arr = np.array(my_data)
d = {'a':10, 'b':20, 'c':30}

In [3]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [5]:
pd.Series(data=my_data, index=labels)

a    10
b    20
c    30
dtype: int64

In [6]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

### Grabbing data from a Series

In [7]:
ser1 = pd.Series([1,2,3,4], ['USA', 'Singapore', 'India', 'China'])

In [8]:
ser1

USA          1
Singapore    2
India        3
China        4
dtype: int64

In [10]:
ser1['USA']

1

In [11]:
ser1+ser1

USA          2
Singapore    4
India        6
China        8
dtype: int64

## DataFrames

In [1]:
import numpy as np 
import pandas as pd 

from numpy.random import randn 

np.random.seed(108)

In [2]:
df = pd.DataFrame(randn(5,4),['A', 'B', 'C', 'D', 'E'], ['W', 'X', 'Y', 'Z'])

In [3]:
df

Unnamed: 0,W,X,Y,Z
A,-1.026905,0.221749,1.13039,1.146185
B,-0.592734,0.118784,-0.48443,-1.944913
C,0.092077,0.902169,1.314469,0.771102
D,-0.540147,-0.284115,-0.889331,0.404169
E,-1.144812,0.545396,1.45407,1.223977


Each column is a series

In [4]:
df['W'] #is a series

A   -1.026905
B   -0.592734
C    0.092077
D   -0.540147
E   -1.144812
Name: W, dtype: float64

In [5]:
type(df['W'] )

pandas.core.series.Series

In [6]:
df.W

A   -1.026905
B   -0.592734
C    0.092077
D   -0.540147
E   -1.144812
Name: W, dtype: float64

### List of column names

In [8]:
df[['W', 'Z']] # This returns a dataframe

Unnamed: 0,W,Z
A,-1.026905,1.146185
B,-0.592734,-1.944913
C,0.092077,0.771102
D,-0.540147,0.404169
E,-1.144812,1.223977


### Creating a new column

In [10]:
df["new"] = df['W']+df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,-1.026905,0.221749,1.13039,1.146185,0.103486
B,-0.592734,0.118784,-0.48443,-1.944913,-1.077165
C,0.092077,0.902169,1.314469,0.771102,1.406546
D,-0.540147,-0.284115,-0.889331,0.404169,-1.429479
E,-1.144812,0.545396,1.45407,1.223977,0.309258


### Removing columns

In [11]:
df.drop("new") # this does not work because drop is referring to index

KeyError: "['new'] not found in axis"

In [12]:
df.drop("new", axis=1) #does not happen in place

Unnamed: 0,W,X,Y,Z
A,-1.026905,0.221749,1.13039,1.146185
B,-0.592734,0.118784,-0.48443,-1.944913
C,0.092077,0.902169,1.314469,0.771102
D,-0.540147,-0.284115,-0.889331,0.404169
E,-1.144812,0.545396,1.45407,1.223977


In [13]:
df

Unnamed: 0,W,X,Y,Z,new
A,-1.026905,0.221749,1.13039,1.146185,0.103486
B,-0.592734,0.118784,-0.48443,-1.944913,-1.077165
C,0.092077,0.902169,1.314469,0.771102,1.406546
D,-0.540147,-0.284115,-0.889331,0.404169,-1.429479
E,-1.144812,0.545396,1.45407,1.223977,0.309258


In [14]:
df.drop("new", axis=1, inplace=True)

In [17]:
df

Unnamed: 0,W,X,Y,Z
A,-1.026905,0.221749,1.13039,1.146185
B,-0.592734,0.118784,-0.48443,-1.944913
C,0.092077,0.902169,1.314469,0.771102
D,-0.540147,-0.284115,-0.889331,0.404169
E,-1.144812,0.545396,1.45407,1.223977


### Dropping column

In [18]:
df.drop("E") #Need inplace=True to actually drop

Unnamed: 0,W,X,Y,Z
A,-1.026905,0.221749,1.13039,1.146185
B,-0.592734,0.118784,-0.48443,-1.944913
C,0.092077,0.902169,1.314469,0.771102
D,-0.540147,-0.284115,-0.889331,0.404169


### Selecting Rows

By row name

In [20]:
df.loc['E'] #takes in a label

W   -1.144812
X    0.545396
Y    1.454070
Z    1.223977
Name: E, dtype: float64

By index

In [22]:
df.iloc[4]

W   -1.144812
X    0.545396
Y    1.454070
Z    1.223977
Name: E, dtype: float64

### Selecting subsets of rows & columns

In [24]:
df.loc['B','Y'] #similar to numpy

-0.484430212868116

In [26]:
df.loc[['A','B'], ['W', 'Y']] #can pass in a list of rows and columns we want

Unnamed: 0,W,Y
A,-1.026905,1.13039
B,-0.592734,-0.48443
