# Pandas Tutorial

In [192]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib as plt

In [45]:
dates = pd.date_range('20190101', periods = 6)

In [46]:
df = pd.DataFrame(np.random.randn(6,4), index = dates, columns = list('ABCD'))

In [47]:
df

Unnamed: 0,A,B,C,D
2019-01-01,-0.904979,-0.176939,-0.016315,-1.406648
2019-01-02,-0.333952,0.181966,-1.129065,0.150698
2019-01-03,-0.664177,0.763707,1.768509,0.465253
2019-01-04,0.695127,-0.064721,0.29439,-0.806327
2019-01-05,0.321178,-0.990223,2.364417,0.865275
2019-01-06,-1.24769,0.615946,2.381283,-0.077978


VIEWING DATA

df.head(): shows top 10 row entries

df.tail(): shows bottom 10 row entries

df.index: shows indices of data

df.columns: shows column names of data

df.DataFrame.to_numpy(): potential to be expensive, as Python will need to find NumPy dtype that holds all the DataFrame dtypes. Most likely be object type, which requires casting every value to a Python object

df.describe(): quick summary statistics of data

df.T: transposes data

df.sort_index(axis=[0,1], ascending=True/False): sorts data according to horizontal or vertical axis

df.sort_values(by=[column name]): sorts data according to specific column

## Getting Data

In [48]:
df['A']

2019-01-01   -0.904979
2019-01-02   -0.333952
2019-01-03   -0.664177
2019-01-04    0.695127
2019-01-05    0.321178
2019-01-06   -1.247690
Freq: D, Name: A, dtype: float64

In [49]:
df[0:3]

Unnamed: 0,A,B,C,D
2019-01-01,-0.904979,-0.176939,-0.016315,-1.406648
2019-01-02,-0.333952,0.181966,-1.129065,0.150698
2019-01-03,-0.664177,0.763707,1.768509,0.465253


In [50]:
df['20190102':'20190104']

Unnamed: 0,A,B,C,D
2019-01-02,-0.333952,0.181966,-1.129065,0.150698
2019-01-03,-0.664177,0.763707,1.768509,0.465253
2019-01-04,0.695127,-0.064721,0.29439,-0.806327


## Selection by Label

In [51]:
df.loc[dates[0]]

A   -0.904979
B   -0.176939
C   -0.016315
D   -1.406648
Name: 2019-01-01 00:00:00, dtype: float64

In [52]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2019-01-01,-0.904979,-0.176939
2019-01-02,-0.333952,0.181966
2019-01-03,-0.664177,0.763707
2019-01-04,0.695127,-0.064721
2019-01-05,0.321178,-0.990223
2019-01-06,-1.24769,0.615946


In this case, we saying we wish to select all rows (blank : in first index spot) and columns A and B

In [53]:
df.loc['20190102':'20190104', ['A', 'B']]

Unnamed: 0,A,B
2019-01-02,-0.333952,0.181966
2019-01-03,-0.664177,0.763707
2019-01-04,0.695127,-0.064721


Note that the endpoints when slicing are inclusive.

In [54]:
df.loc['20190102', ['A','B']]

A   -0.333952
B    0.181966
Name: 2019-01-02 00:00:00, dtype: float64

In [55]:
df.loc[dates[0], 'A']
df.at[dates[0], 'A']

-0.90497865874235295

Both above options return the same scalar value (although .at is typically a quicker way of accomplishing this).

## Selection by Position

In [56]:
df.iloc[3]

A    0.695127
B   -0.064721
C    0.294390
D   -0.806327
Name: 2019-01-04 00:00:00, dtype: float64

In [57]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2019-01-04,0.695127,-0.064721
2019-01-05,0.321178,-0.990223


Note that selections by position/index is exclusive.

In [58]:
df.iloc[[1,2,4], [0,2]]

Unnamed: 0,A,C
2019-01-02,-0.333952,-1.129065
2019-01-03,-0.664177,1.768509
2019-01-05,0.321178,2.364417


In [59]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2019-01-02,-0.333952,0.181966,-1.129065,0.150698
2019-01-03,-0.664177,0.763707,1.768509,0.465253


In [60]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2019-01-01,-0.176939,-0.016315
2019-01-02,0.181966,-1.129065
2019-01-03,0.763707,1.768509
2019-01-04,-0.064721,0.29439
2019-01-05,-0.990223,2.364417
2019-01-06,0.615946,2.381283


In [61]:
df.iloc[1,1]
df.iat[1,1]

0.18196556812128786

## Boolean Indexing

In [62]:
df[df.A > -1]

Unnamed: 0,A,B,C,D
2019-01-01,-0.904979,-0.176939,-0.016315,-1.406648
2019-01-02,-0.333952,0.181966,-1.129065,0.150698
2019-01-03,-0.664177,0.763707,1.768509,0.465253
2019-01-04,0.695127,-0.064721,0.29439,-0.806327
2019-01-05,0.321178,-0.990223,2.364417,0.865275


In [63]:
df[df > 0]

Unnamed: 0,A,B,C,D
2019-01-01,,,,
2019-01-02,,0.181966,,0.150698
2019-01-03,,0.763707,1.768509,0.465253
2019-01-04,0.695127,,0.29439,
2019-01-05,0.321178,,2.364417,0.865275
2019-01-06,,0.615946,2.381283,


Returns whole DataFrame where results match given boolean.

In [64]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2019-01-01,-0.904979,-0.176939,-0.016315,-1.406648,one
2019-01-02,-0.333952,0.181966,-1.129065,0.150698,one
2019-01-03,-0.664177,0.763707,1.768509,0.465253,two
2019-01-04,0.695127,-0.064721,0.29439,-0.806327,three
2019-01-05,0.321178,-0.990223,2.364417,0.865275,four
2019-01-06,-1.24769,0.615946,2.381283,-0.077978,three


In [65]:
df2[df2['E'].isin(['four','one'])]

Unnamed: 0,A,B,C,D,E
2019-01-01,-0.904979,-0.176939,-0.016315,-1.406648,one
2019-01-02,-0.333952,0.181966,-1.129065,0.150698,one
2019-01-05,0.321178,-0.990223,2.364417,0.865275,four


## Setting

In [66]:
s1 = pd.Series([1,2,3,4,5,6], index = pd.date_range('20190102', periods=6))

df['F'] = s1

In [67]:
df.at[dates[0],'A'] = 0

This sets the value by Label.

In [68]:
df.iat[0,1] = 0

This sets the value by position.

In [71]:
df.loc[:, 'D'] = np.array([5] * len(df))

Here we are taking the 0 index position of column name 'D', and creating a numpy array consisting of the integer 5, down the length of the DataFrame.

In [72]:
df

Unnamed: 0,A,B,C,D,F
2019-01-01,0.0,0.0,-0.016315,5,
2019-01-02,-0.333952,0.181966,-1.129065,5,1.0
2019-01-03,-0.664177,0.763707,1.768509,5,2.0
2019-01-04,0.695127,-0.064721,0.29439,5,3.0
2019-01-05,0.321178,-0.990223,2.364417,5,4.0
2019-01-06,-1.24769,0.615946,2.381283,5,5.0


In [73]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2019-01-01,0.0,0.0,-0.016315,-5,
2019-01-02,-0.333952,-0.181966,-1.129065,-5,-1.0
2019-01-03,-0.664177,-0.763707,-1.768509,-5,-2.0
2019-01-04,-0.695127,-0.064721,-0.29439,-5,-3.0
2019-01-05,-0.321178,-0.990223,-2.364417,-5,-4.0
2019-01-06,-1.24769,-0.615946,-2.381283,-5,-5.0


## Missing Data

In [76]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2019-01-01,0.0,0.0,-0.016315,5,,1.0
2019-01-02,-0.333952,0.181966,-1.129065,5,1.0,1.0
2019-01-03,-0.664177,0.763707,1.768509,5,2.0,
2019-01-04,0.695127,-0.064721,0.29439,5,3.0,


In [78]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,F,E
2019-01-02,-0.333952,0.181966,-1.129065,5,1.0,1.0


In [79]:
df1.fillna(value = 5)

Unnamed: 0,A,B,C,D,F,E
2019-01-01,0.0,0.0,-0.016315,5,5.0,1.0
2019-01-02,-0.333952,0.181966,-1.129065,5,1.0,1.0
2019-01-03,-0.664177,0.763707,1.768509,5,2.0,5.0
2019-01-04,0.695127,-0.064721,0.29439,5,3.0,5.0


In [86]:
pd.isnull(df1)

Unnamed: 0,A,B,C,D,F,E
2019-01-01,False,False,False,False,True,False
2019-01-02,False,False,False,False,False,False
2019-01-03,False,False,False,False,False,True
2019-01-04,False,False,False,False,False,True


## Operations

In [89]:
df.mean()

A   -0.204919
B    0.084446
C    0.943870
D    5.000000
F    3.000000
dtype: float64

In [88]:
df.mean(1)

2019-01-01    1.245921
2019-01-02    0.943790
2019-01-03    1.773608
2019-01-04    1.784959
2019-01-05    2.139074
2019-01-06    2.349908
Freq: D, dtype: float64

In [91]:
s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
s

2019-01-01    NaN
2019-01-02    NaN
2019-01-03    1.0
2019-01-04    3.0
2019-01-05    5.0
2019-01-06    NaN
Freq: D, dtype: float64

In [92]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2019-01-01,,,,,
2019-01-02,,,,,
2019-01-03,-1.664177,-0.236293,0.768509,4.0,1.0
2019-01-04,-2.304873,-3.064721,-2.70561,2.0,0.0
2019-01-05,-4.678822,-5.990223,-2.635583,0.0,-1.0
2019-01-06,,,,,


In [96]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2019-01-01,0.0,0.0,-0.016315,5,
2019-01-02,-0.333952,0.181966,-1.145381,10,1.0
2019-01-03,-0.998129,0.945673,0.623128,15,3.0
2019-01-04,-0.303002,0.880952,0.917518,20,6.0
2019-01-05,0.018175,-0.109271,3.281935,25,10.0
2019-01-06,-1.229514,0.506675,5.663218,30,15.0


In [94]:
df.apply(lambda x: x.max() - x.min())

A    1.942816
B    1.753930
C    3.510348
D    0.000000
F    4.000000
dtype: float64

In [97]:
s = pd.Series(np.random.randint(0,7,size=10))
s

0    6
1    5
2    3
3    5
4    2
5    0
6    4
7    5
8    4
9    3
dtype: int32

In [98]:
s.value_counts()

5    3
4    2
3    2
6    1
2    1
0    1
dtype: int64

In [99]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog'])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
dtype: object

## Merge

In [103]:
df = pd.DataFrame(np.random.randn(10, 4))

In [104]:
df

Unnamed: 0,0,1,2,3
0,2.157737,-0.767154,0.248337,0.900952
1,-1.224873,1.546289,0.40225,0.813753
2,0.211446,-0.254531,-0.499725,0.3405
3,-0.603269,-0.101907,-0.56917,1.841087
4,0.704397,-1.402158,-1.100932,-0.373414
5,0.009187,-1.438603,-1.100593,0.179202
6,0.28542,-0.827262,1.008221,0.81048
7,-0.341411,1.189983,0.289946,0.679269
8,-1.015629,-0.129926,0.96957,2.375511
9,-0.159682,-1.055534,0.298344,-1.075044


In [105]:
pieces = [df[:3], df[3:7], df[7:]]
pieces

[          0         1         2         3
 0  2.157737 -0.767154  0.248337  0.900952
 1 -1.224873  1.546289  0.402250  0.813753
 2  0.211446 -0.254531 -0.499725  0.340500,
           0         1         2         3
 3 -0.603269 -0.101907 -0.569170  1.841087
 4  0.704397 -1.402158 -1.100932 -0.373414
 5  0.009187 -1.438603 -1.100593  0.179202
 6  0.285420 -0.827262  1.008221  0.810480,
           0         1         2         3
 7 -0.341411  1.189983  0.289946  0.679269
 8 -1.015629 -0.129926  0.969570  2.375511
 9 -0.159682 -1.055534  0.298344 -1.075044]

In [106]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,2.157737,-0.767154,0.248337,0.900952
1,-1.224873,1.546289,0.40225,0.813753
2,0.211446,-0.254531,-0.499725,0.3405
3,-0.603269,-0.101907,-0.56917,1.841087
4,0.704397,-1.402158,-1.100932,-0.373414
5,0.009187,-1.438603,-1.100593,0.179202
6,0.28542,-0.827262,1.008221,0.81048
7,-0.341411,1.189983,0.289946,0.679269
8,-1.015629,-0.129926,0.96957,2.375511
9,-0.159682,-1.055534,0.298344,-1.075044


In [116]:
left = pd.DataFrame({'key':['foo','bar'],
                    'lval':[1,2]})
right = pd.DataFrame({'key':['foo','bar'],
                     'rval':[4,5]})

In [117]:
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [118]:
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [119]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


In [120]:
df = pd.DataFrame(np.random.randn(8,4), columns = ['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,0.352698,-1.346385,0.140645,0.844804
1,0.427095,0.776962,1.00541,-0.815959
2,-1.194405,-0.218779,1.051591,-0.743157
3,-2.547125,-1.388956,0.607407,-0.325967
4,-0.885581,0.649063,0.426737,-1.198485
5,-0.426537,0.339532,0.3011,0.826274
6,1.839677,0.749072,0.071695,-0.316201
7,-0.326409,1.012004,-0.903174,-1.738913


In [121]:
s = df.iloc[3]

In [122]:
df.append(s,ignore_index=True)

Unnamed: 0,A,B,C,D
0,0.352698,-1.346385,0.140645,0.844804
1,0.427095,0.776962,1.00541,-0.815959
2,-1.194405,-0.218779,1.051591,-0.743157
3,-2.547125,-1.388956,0.607407,-0.325967
4,-0.885581,0.649063,0.426737,-1.198485
5,-0.426537,0.339532,0.3011,0.826274
6,1.839677,0.749072,0.071695,-0.316201
7,-0.326409,1.012004,-0.903174,-1.738913
8,-2.547125,-1.388956,0.607407,-0.325967


## Grouping

In [129]:
df = pd.DataFrame(
    {
    'A': ['foo','bar','hello','world','foo','bar'],
    'B': ['one','two','one','three','five','two'],
    'C': np.random.randn(6),
    'D': np.random.randn(6)})

In [130]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-0.134976,1.490109
1,bar,two,1.405413,-0.630203
2,hello,one,0.109774,-0.606078
3,world,three,0.38585,-0.714593
4,foo,five,-1.090445,0.586995
5,bar,two,-0.936691,-0.773542


In [131]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.468722,-1.403744
foo,-1.225421,2.077104
hello,0.109774,-0.606078
world,0.38585,-0.714593


In [132]:
df.groupby(['A','B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,two,0.468722,-1.403744
foo,five,-1.090445,0.586995
foo,one,-0.134976,1.490109
hello,one,0.109774,-0.606078
world,three,0.38585,-0.714593


## Reshaping

In [133]:
tuples = list(zip(*
                  [['bar','bar','baz','baz','foo','foo','qux','qux'],
                   ['one','two','one','two','one','two','one','two']]))

In [134]:
index = pd.MultiIndex.from_tuples(tuples, names=['first','second'])

In [135]:
df = pd.DataFrame(np.random.randn(8,2), index=index, columns=['A','B'])

In [137]:
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.166504,-0.422576
bar,two,0.769038,1.722026
baz,one,-0.330003,0.138846
baz,two,0.075569,1.066985


In [138]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.166504
               B   -0.422576
       two     A    0.769038
               B    1.722026
baz    one     A   -0.330003
               B    0.138846
       two     A    0.075569
               B    1.066985
dtype: float64

In [139]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.166504,-0.422576
bar,two,0.769038,1.722026
baz,one,-0.330003,0.138846
baz,two,0.075569,1.066985


In [140]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.166504,0.769038
bar,B,-0.422576,1.722026
baz,A,-0.330003,0.075569
baz,B,0.138846,1.066985


In [141]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.166504,-0.330003
one,B,-0.422576,0.138846
two,A,0.769038,0.075569
two,B,1.722026,1.066985


In [142]:
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
                             'B': ['A', 'B', 'C'] * 4,
                             'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                             'D': np.random.randn(12),
                             'E': np.random.randn(12)})

In [143]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,0.1224,0.827872
1,one,B,foo,0.59282,-1.379344
2,two,C,foo,0.570556,1.244838
3,three,A,bar,-0.473937,-0.420674
4,one,B,bar,0.32902,1.051787
5,one,C,bar,1.521682,-0.850051
6,two,A,foo,0.015687,-1.414727
7,three,B,foo,-0.919171,-0.064276
8,one,C,foo,-1.259494,0.038994
9,one,A,bar,1.66693,-0.228838


In [144]:
pd.pivot_table(df, values='D', index=['A','B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,1.66693,0.1224
one,B,0.32902,0.59282
one,C,1.521682,-1.259494
three,A,-0.473937,
three,B,,-0.919171
three,C,0.7097,
two,A,,0.015687
two,B,-0.977171,
two,C,,0.570556


## Time Series

In [145]:
rng = pd.date_range('1/1/2019', periods = 100, freq = 'S')

In [146]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index = rng)

In [147]:
ts.resample('5Min').sum()

2019-01-01    24631
Freq: 5T, dtype: int32

In [148]:
rng = pd.date_range('3/6/2019 00:00', periods = 5, freq = 'D')

In [150]:
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2019-03-06   -1.981630
2019-03-07   -0.240636
2019-03-08   -0.550495
2019-03-09    0.129564
2019-03-10    0.350797
Freq: D, dtype: float64

In [152]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2019-03-06 00:00:00+00:00   -1.981630
2019-03-07 00:00:00+00:00   -0.240636
2019-03-08 00:00:00+00:00   -0.550495
2019-03-09 00:00:00+00:00    0.129564
2019-03-10 00:00:00+00:00    0.350797
Freq: D, dtype: float64

In [153]:
ts_utc.tz_convert('US/Eastern')

2019-03-05 19:00:00-05:00   -1.981630
2019-03-06 19:00:00-05:00   -0.240636
2019-03-07 19:00:00-05:00   -0.550495
2019-03-08 19:00:00-05:00    0.129564
2019-03-09 19:00:00-05:00    0.350797
Freq: D, dtype: float64

In [154]:
rng = pd.date_range('1/1/2018', periods = 5, freq = 'M')

In [156]:
ts = pd.Series(np.random.randn(len(rng)), index = rng)
ts

2018-01-31   -2.261389
2018-02-28    0.076197
2018-03-31    1.885716
2018-04-30   -0.951158
2018-05-31    0.382603
Freq: M, dtype: float64

In [158]:
ps = ts.to_period()
ps

2018-01   -2.261389
2018-02    0.076197
2018-03    1.885716
2018-04   -0.951158
2018-05    0.382603
Freq: M, dtype: float64

In [159]:
ps.to_timestamp()

2018-01-01   -2.261389
2018-02-01    0.076197
2018-03-01    1.885716
2018-04-01   -0.951158
2018-05-01    0.382603
Freq: MS, dtype: float64

In [160]:
prng = pd.period_range('1990Q1','2000Q4', freq='Q-NOV')

In [164]:
ts = pd.Series(np.random.randn(len(prng)), prng)

In [165]:
ts.index = (prng.asfreq('M','e') + 1).asfreq('H','s') + 9

In [166]:
ts.head()

1990-03-01 09:00   -1.070135
1990-06-01 09:00   -0.464422
1990-09-01 09:00   -1.523553
1990-12-01 09:00   -0.021623
1991-03-01 09:00    0.478397
Freq: H, dtype: float64

## Categoricals

In [167]:
df = pd.DataFrame({
    'id': [1,2,3,4,5,6],
    'raw_grade': ['a','b','b','a','a','e']
})

In [168]:
df['grade'] = df['raw_grade'].astype('category')
df['grade']

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): [a, b, e]

In [171]:
df['grade'].cat.categories = ['very good','good','very bad']

In [173]:
df['grade'] = df['grade'].cat.set_categories([
    'very bad',
    'bad',
    'medium',
    'good',
    'very good'
])
df['grade']

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): [very bad, bad, medium, good, very good]

In [174]:
df.sort_values(by='grade')

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [175]:
df.groupby('grade').size()

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

## Plotting

In [179]:
ts = pd.Series(np.random.randn(1000), index = pd.date_range('1/1/2010', periods = 1000))

In [180]:
ts = ts.cumsum()

In [193]:
ts.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1a130c10358>

In [189]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
                  columns=['A', 'B', 'C', 'D'])

In [190]:
df = df.cumsum()

In [194]:
plt.figure()

TypeError: 'module' object is not callable

In [195]:
df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1a130e7bcc0>

In [196]:
plt.legend(loc='best')

TypeError: 'module' object is not callable

## Getting Data In and Out

In [185]:
df.to_csv('foo.csv')

In [186]:
pd.read_csv('foo.csv')

Unnamed: 0.1,Unnamed: 0,id,raw_grade,grade
0,0,1,a,very good
1,1,2,b,good
2,2,3,b,good
3,3,4,a,very good
4,4,5,a,very good
5,5,6,e,very bad
