In [36]:
list(range(1, 20, 2))

[1, 3, 5, 7, 9, 11, 13, 15, 17, 19]

In [37]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)


data = {'state': ['Ujjain', 'Ujjain', 'Ujjain', 'Prayag', 'Prayag', 'Prayag'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
print(frame)

    state  year  pop
0  Ujjain  2000  1.5
1  Ujjain  2001  1.7
2  Ujjain  2002  3.6
3  Prayag  2001  2.4
4  Prayag  2002  2.9
5  Prayag  2003  3.2


In [38]:
frame.dtypes

state     object
year       int64
pop      float64
dtype: object

In [39]:
type(frame)

pandas.core.frame.DataFrame

In [40]:
data['state']

['Ujjain', 'Ujjain', 'Ujjain', 'Prayag', 'Prayag', 'Prayag']

type(data)

In [41]:
type(data)

dict

In [42]:
pd.DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ujjain,1.5
1,2001,Ujjain,1.7
2,2002,Ujjain,3.6
3,2001,Prayag,2.4
4,2002,Prayag,2.9
5,2003,Prayag,3.2


In [43]:
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                      index=['one', 'two', 'three', 'four',
                             'five', 'six'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ujjain,1.5,
two,2001,Ujjain,1.7,
three,2002,Ujjain,3.6,
four,2001,Prayag,2.4,
five,2002,Prayag,2.9,
six,2003,Prayag,3.2,


In [44]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [45]:
frame2['state']

one      Ujjain
two      Ujjain
three    Ujjain
four     Prayag
five     Prayag
six      Prayag
Name: state, dtype: object

In [46]:
frame2[['state']]

Unnamed: 0,state
one,Ujjain
two,Ujjain
three,Ujjain
four,Prayag
five,Prayag
six,Prayag


In [47]:
frame2.index

Index(['one', 'two', 'three', 'four', 'five', 'six'], dtype='object')

In [48]:
frame2['debt']  = 16.5

In [49]:
frame2.debt

one      16.5
two      16.5
three    16.5
four     16.5
five     16.5
six      16.5
Name: debt, dtype: float64

In [50]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ujjain,1.5,16.5
two,2001,Ujjain,1.7,16.5
three,2002,Ujjain,3.6,16.5
four,2001,Prayag,2.4,16.5
five,2002,Prayag,2.9,16.5
six,2003,Prayag,3.2,16.5


In [51]:
frame2.loc[['three']]

Unnamed: 0,year,state,pop,debt
three,2002,Ujjain,3.6,16.5


In [53]:
frame2.iloc[[2]]

Unnamed: 0,year,state,pop,debt
three,2002,Ujjain,3.6,16.5


In [54]:
frame2['debt'] = np.arange(6)
frame2


Unnamed: 0,year,state,pop,debt
one,2000,Ujjain,1.5,0
two,2001,Ujjain,1.7,1
three,2002,Ujjain,3.6,2
four,2001,Prayag,2.4,3
five,2002,Prayag,2.9,4
six,2003,Prayag,3.2,5


In [55]:
frame2.T

Unnamed: 0,one,two,three,four,five,six
year,2000,2001,2002,2001,2002,2003
state,Ujjain,Ujjain,Ujjain,Prayag,Prayag,Prayag
pop,1.5,1.7,3.6,2.4,2.9,3.2
debt,0,1,2,3,4,5


In [57]:
frame2.T.T

Unnamed: 0,year,state,pop,debt
one,2000,Ujjain,1.5,0
two,2001,Ujjain,1.7,1
three,2002,Ujjain,3.6,2
four,2001,Prayag,2.4,3
five,2002,Prayag,2.9,4
six,2003,Prayag,3.2,5


In [60]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [61]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2


Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [62]:
np.arange(9).reshape((3, 3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [65]:
states = ['Texas', 'Utah', 'California']

In [66]:
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [68]:
frame.loc[['a', 'b', 'c', 'd'], states]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  if __name__ == '__main__':


Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


In [69]:
frame.iloc[[0,1,2], [0,1,2]]

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [70]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [71]:
#filtering
data[data['three'] > 5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [72]:
data[ data < 5]

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,4.0,,,
Utah,,,,
New York,,,,


In [73]:
data.iloc[:3, :3]

Unnamed: 0,one,two,three
Ohio,0,1,2
Colorado,4,5,6
Utah,8,9,10


In [80]:
df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),
                   index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
                   index=['Utah', 'Ohio', 'Texas', 'Oregon'])


In [81]:
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [76]:
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [85]:
df1+df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [90]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=['b','d','e'],
                     index=['Utah', 'Ohio', 'Texas', 'Oregon'])

frame = np.abs(frame)
frame

Unnamed: 0,b,d,e
Utah,0.331286,1.349742,0.069877
Ohio,0.246674,0.011862,1.004812
Texas,1.327195,0.919262,1.549106
Oregon,0.022185,0.758363,0.660524


In [92]:
frame.apply(lambda x: x.max(), axis =1)

Utah      1.349742
Ohio      1.004812
Texas     1.549106
Oregon    0.758363
dtype: float64

In [93]:
format = lambda x: '%.f' % x
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,0,1,0
Ohio,0,0,1
Texas,1,1,2
Oregon,0,1,1


In [94]:
frame['e'].map(format)

Utah      0
Ohio      1
Texas     2
Oregon    1
Name: e, dtype: object

In [95]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                   [np.nan, np.nan], [0.75, -1.3]],
                  index=['a', 'b', 'c', 'd'],
                  columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [96]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [97]:
df['sumax'] = df.sum(axis='columns')
df

Unnamed: 0,one,two,sumax
a,1.4,,1.4
b,7.1,-4.5,2.6
c,,,0.0
d,0.75,-1.3,-0.55


In [98]:
df.describe()

Unnamed: 0,one,two,sumax
count,3.0,2.0,4.0
mean,3.083333,-2.9,0.8625
std,3.493685,2.262742,1.419727
min,0.75,-4.5,-0.55
25%,1.075,-3.7,-0.1375
50%,1.4,-2.9,0.7
75%,4.25,-2.1,1.7
max,7.1,-1.3,2.6
