In [1]:
import pandas as pd

### Categorical variables

In [2]:
# Let's use pandas to create Categorical Series. One way is by 
# specifying dtype="category" when constructing a Series:

s = pd.Series(["a","b","c","a"], dtype="category")
s

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]

In [3]:
# Another way is to convert an existing Series or column to a 
# category dtype:

df = pd.DataFrame({"A":["a","b","c","a"]})
df["B"] = df["A"].astype('category')
df

Unnamed: 0,A,B
0,a,a
1,b,b
2,c,c
3,a,a


In [4]:
df.dtypes

A      object
B    category
dtype: object

In [5]:
# You can also pass a pandas.Categorical object to a Series 

raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"],
                          ordered=False)

In [6]:
 s = pd.Series(raw_cat)
 s

0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (3, object): [b, c, d]

### Dummy variables

In [12]:
# Let's use pd.get_dummies to convert categorical variables into dummy 
# variables. First let's create a small DataFrame with categorical variables. 

df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})
df

Unnamed: 0,data1,key
0,0,b
1,1,b
2,2,a
3,3,c
4,4,a
5,5,b


In [11]:
# Now, let's convert the categorical variables into dummy variables. 

pd.get_dummies(df['key'])

Unnamed: 0,a,b,c
0,0,1,0
1,0,1,0
2,1,0,0
3,0,0,1
4,1,0,0
5,0,1,0


In [14]:
df[['dummy_a', 'dummy_b', 'dummy_c']] = pd.get_dummies(df['key'])

In [15]:
df

Unnamed: 0,data1,key,dummy_a,dummy_b,dummy_c
0,0,b,0,1,0
1,1,b,0,1,0
2,2,a,1,0,0
3,3,c,0,0,1
4,4,a,1,0,0
5,5,b,0,1,0
