In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
email = 'jose@email.com'

In [4]:
email.split('@')

['jose', 'email.com']

In [5]:
names = pd.Series(['andrew', 'bobo', 'claire', 'david', '5'])

In [6]:
names

0    andrew
1      bobo
2    claire
3     david
4         5
dtype: object

In [7]:
names.str.upper()

0    ANDREW
1      BOBO
2    CLAIRE
3     DAVID
4         5
dtype: object

In [8]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [9]:
tech_finance = ['GOOG,APPL,AMZN', 'JPM,BAC,GS']

In [10]:
len(tech_finance)

2

In [12]:
tickers = pd.Series(tech_finance)

In [13]:
tickers

0    GOOG,APPL,AMZN
1        JPM,BAC,GS
dtype: object

In [22]:
tickers.str.split(',', expand=True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


In [17]:
tech = 'GOOG,APPL,AMZN'

In [18]:
tech.split(',')

['GOOG', 'APPL', 'AMZN']

In [19]:
tech.split(',')[0]

'GOOG'

In [34]:
messy_names = pd.Series(['andrew ', 'bo;bo','   claire   '])

In [24]:
messy_names

0         andrew 
1           bo;bo
2       claire   
dtype: object

In [28]:
messy_names.str.replace(';', '').str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

In [35]:
def cleanup(name):
    name = name.replace(';', '')
    name = name.strip()
    name = name.capitalize()
    return name

In [36]:
messy_names = messy_names.apply(cleanup)

In [37]:
messy_names

0    Andrew
1      Bobo
2    Claire
dtype: object

In [38]:
from datetime import datetime

In [39]:
myyear = 2015
mymonth = 1
myday = 1
myhour = 2
mymin = 30
mysec = 15

In [40]:
mydate = datetime(myyear, mymonth, myday)

In [41]:
mydate

datetime.datetime(2015, 1, 1, 0, 0)

In [42]:
mydatetime = datetime(myyear, mymonth, myday, myhour, mymin, mysec)

In [43]:
mydatetime

datetime.datetime(2015, 1, 1, 2, 30, 15)

In [44]:
mydatetime.year

2015

In [45]:
myser = pd.Series(['Nov 3, 1990', '2000-01-01', None])

In [46]:
myser

0    Nov 3, 1990
1     2000-01-01
2           None
dtype: object

In [49]:
timeser = pd.to_datetime(myser)

In [50]:
timeser

0   1990-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

In [51]:
timeser[0].year

1990

In [56]:
obvi_euro_date = '31-12-2000'

In [57]:
pd.to_datetime(obvi_euro_date)

  pd.to_datetime(obvi_euro_date)


Timestamp('2000-12-31 00:00:00')

In [58]:
euro_date = '10-12-2000'

In [60]:
pd.to_datetime(euro_date, dayfirst = True)

Timestamp('2000-12-10 00:00:00')

In [61]:
style_date = '12--Dec--2000'

In [63]:
pd.to_datetime(style_date, format='%d--%b--%Y')

Timestamp('2000-12-12 00:00:00')

In [64]:
custom_date = '12th of Dec 2000'

In [65]:
pd.to_datetime(custom_date)

Timestamp('2000-12-12 00:00:00')

In [66]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')

In [67]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [69]:
sales['DATE']

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [73]:
sales['DATE'] = pd.to_datetime(sales['DATE'])

In [74]:
sales['DATE'][0].year

1992

In [76]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates = [0])

In [77]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [78]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [81]:
sales = sales.set_index('DATE')

In [84]:
sales.resample(rule='Y').mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


In [85]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates = [0])

In [86]:
sales.info

<bound method DataFrame.info of           DATE  MRTSSM4453USN
0   1992-01-01           1509
1   1992-02-01           1541
2   1992-03-01           1597
3   1992-04-01           1675
4   1992-05-01           1822
..         ...            ...
335 2019-12-01           6630
336 2020-01-01           4388
337 2020-02-01           4533
338 2020-03-01           5562
339 2020-04-01           5207

[340 rows x 2 columns]>

In [87]:
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int64

In [88]:
sales['DATE'].dt.month

0       1
1       2
2       3
3       4
4       5
       ..
335    12
336     1
337     2
338     3
339     4
Name: DATE, Length: 340, dtype: int64