In [1]:
import pandas as pd

In [2]:
import numpy as np

In [9]:
df = pd.DataFrame([['a.', 'b.'], ['c.', 'd.']])

In [10]:
df

Unnamed: 0,0,1
0,a.,b.
1,c.,d.


In [11]:
df[1].str.upper()

0    B.
1    D.
Name: 1, dtype: object

In [12]:
df[1].str.upper().str.replace('.', '!!!')

0    B!!!
1    D!!!
Name: 1, dtype: object

In [13]:
df[1] = df[1].str.upper().str.replace('.', '!!!')

In [14]:
df

Unnamed: 0,0,1
0,a.,B!!!
1,c.,D!!!


In [15]:
df = pd.DataFrame([['BAZCO', 'https://baz.edu' ],
                   ['Foo Inc', 'http://foo.com']],
              columns=['Name', 'URL'])

In [16]:
df

Unnamed: 0,Name,URL
0,BAZCO,https://baz.edu
1,Foo Inc,http://foo.com


In [17]:
df['URL'].str.split('://')

0    [https, baz.edu]
1     [http, foo.com]
Name: URL, dtype: object

In [18]:
df['URL'].str.split('://', expand=True)

Unnamed: 0,0,1
0,https,baz.edu
1,http,foo.com


In [19]:
df[['Protocol', 'Domain']] =  df['URL'].str.split('://', expand=True)

In [20]:
df

Unnamed: 0,Name,URL,Protocol,Domain
0,BAZCO,https://baz.edu,https,baz.edu
1,Foo Inc,http://foo.com,http,foo.com


Having atomic values in a column.... if you have multiple values, you may want to parse those out

In [21]:
df = pd.DataFrame(np.arange(9).reshape((3, 3)),
                 columns = ['a', 'b', 'c'])

In [22]:
df

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [23]:
df.columns = ['x', 'y', 'z']

In [24]:
df

Unnamed: 0,x,y,z
0,0,1,2
1,3,4,5
2,6,7,8


In [25]:
df.columns[2] = 'Col3'

TypeError: Index does not support mutable operations

In [26]:
df.columns

Index(['x', 'y', 'z'], dtype='object')

In [27]:
df.columns.map(str.upper)

Index(['X', 'Y', 'Z'], dtype='object')

In [28]:
df

Unnamed: 0,x,y,z
0,0,1,2
1,3,4,5
2,6,7,8


In [29]:
df.columns =  df.columns.map(str.upper)

In [30]:
df

Unnamed: 0,X,Y,Z
0,0,1,2
1,3,4,5
2,6,7,8


In [31]:
df.rename(columns={'Z': 'col 3'})

Unnamed: 0,X,Y,col 3
0,0,1,2
1,3,4,5
2,6,7,8


In [32]:
df

Unnamed: 0,X,Y,Z
0,0,1,2
1,3,4,5
2,6,7,8


In [33]:
data = [['2009', '$500'],
        ['2010', '$1,234'],
        ['2011', 'WAT!'],
        ['2012', '$2,507']]
df = pd.DataFrame(data , columns=['date', 'total'])

In [34]:
df

Unnamed: 0,date,total
0,2009,$500
1,2010,"$1,234"
2,2011,WAT!
3,2012,"$2,507"


In [36]:
df['total'].str.replace('$', '').str.replace(',','').astype('float64')

ValueError: could not convert string to float: 'WAT!'

In [37]:
pd.to_numeric(df['total'].str.replace('$', '').str.replace(',',''), errors='coerce')

0     500.0
1    1234.0
2       NaN
3    2507.0
Name: total, dtype: float64

In [38]:
s = pd.Series(['Jan 7, 2014', 'May 29, 1993'])


In [39]:
s

0     Jan 7, 2014
1    May 29, 1993
dtype: object

In [41]:
new_s = pd.to_datetime(s)

In [42]:
new_s

0   2014-01-07
1   1993-05-29
dtype: datetime64[ns]

In [43]:
new_s.dt.month_name()

0    January
1        May
dtype: object

In [44]:
new_s.dt.month

0    1
1    5
dtype: int64

In [45]:
import re

In [46]:
re.findall('a\w', 'ana nab a banana')

['an', 'ab', 'an', 'an']

In [47]:
re.findall('a.', 'ana nab a banana')

['an', 'a ', 'ab', 'a ', 'an', 'an']

In [48]:
re.search('(an)*a', 'ana nab a banana')


<re.Match object; span=(0, 3), match='ana'>

In [49]:
a = pd.DataFrame([[2, 20], [4, 40], [6, 60], [8, 80]],
                 columns=['k', 'col1'])
b = pd.DataFrame([[4, 2], [4, 3], [8, 7]], 
                 columns=['k', 'col2'])

In [50]:
a

Unnamed: 0,k,col1
0,2,20
1,4,40
2,6,60
3,8,80


In [51]:
b

Unnamed: 0,k,col2
0,4,2
1,4,3
2,8,7


In [52]:
pd.merge(a, b, on='k')

Unnamed: 0,k,col1,col2
0,4,40,2
1,4,40,3
2,8,80,7


In [53]:
pd.merge(a, b, on='k', how='outer')

Unnamed: 0,k,col1,col2
0,2,20,
1,4,40,2.0
2,4,40,3.0
3,6,60,
4,8,80,7.0


In [54]:
d1 = pd.DataFrame(np.arange(9).reshape((3, 3)),
                columns=list('abc'))
d2 = pd.DataFrame(np.arange(10, 19).reshape((3, 3)),
                columns=list('abc'))

In [55]:
d1

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8


In [56]:
d2

Unnamed: 0,a,b,c
0,10,11,12
1,13,14,15
2,16,17,18


In [57]:
pd.concat([d1, d2])

Unnamed: 0,a,b,c
0,0,1,2
1,3,4,5
2,6,7,8
0,10,11,12
1,13,14,15
2,16,17,18


In [58]:
s = '{"first":"joe", "last":"versoza"}'


In [61]:
import json
d = json.loads(s)

In [62]:
d['first']

'joe'

In [63]:
d

{'first': 'joe', 'last': 'versoza'}

In [65]:
json.dumps(d)

'{"first": "joe", "last": "versoza"}'