In [3]:
import pandas as pd
import numpy as np

In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [5]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [6]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6,4 ), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.735683,-2.030598,-0.75561,-1.289416
2013-01-02,-0.091058,-1.051351,0.461401,-1.614872
2013-01-03,0.106841,-0.122167,0.571151,-0.502561
2013-01-04,0.05282,-0.320354,-1.084932,0.110737
2013-01-05,0.816492,0.199042,-0.023076,0.402104
2013-01-06,0.08596,-0.177328,-1.070543,0.783214


In [8]:
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20200101'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2020-01-01,1.0,3,test,foo
1,1.0,2020-01-01,1.0,3,train,foo
2,1.0,2020-01-01,1.0,3,test,foo
3,1.0,2020-01-01,1.0,3,train,foo


In [9]:
df2.types

AttributeError: 'DataFrame' object has no attribute 'types'

# Time Series
pandas has simple, powerful, and efficient functionality for performing resampling operations during frequency conversion (e.g., converting secondly data into 5-minutely data). This is extremely common in, but not limited to, financial applications. 

In [None]:
rng = pd.date_range('1/1/2020', periods=5, freq='D')
ts = pd.Series(np.random.randn(len(rng)), rng)

ts

In [None]:
ts_utc = ts.tz_localize('UTC')
ts_utc

In [None]:
ts_utc.tz_convert('US/Eastern')

In [None]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts

In [None]:
ps = ts.to_period()

ps

In [None]:
ps.to_timestamp()

In [None]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')

ts = pd.Series(np.random.randn(len(prng)), prng)

ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's')

ts.head()

In [None]:
df = pd.DataFrame({"id": [1,2,3,4,5,6],
                  "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})

df["grade"] = df["raw_grade"].astype("category")

df["grade"]

In [None]:
df["grade"].cat.categories = ["very good", "good", "very bad"]

In [None]:
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])

df["grade"]

In [None]:
df.sort_values(by="grade")

In [None]:
df.groupby("grade").size()

In [None]:
import matplotlib.pyplot as plt

plt.close('all')

In [None]:
ts = pd.Series(np.random.randn(1000),
              index=pd.date_range('1/1/2000', periods=1000))

ts = ts.cumsum()

ts.plot()

In [None]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index,
                 columns=['A', 'B', 'C', 'D'])

df = df.cumsum()

plt.figure()

df.plot()

plt.legend(loc='best')

In [None]:
df.to_csv('foo.csv')

In [None]:
pd.read_csv('foo.csv')

In [None]:
df.to_hdf('foo.h5', 'df')

In [None]:
import openpyxl

df.to_excel('foo.xlsx', 'Sheet1')

In [None]:
if pd.Series([False, True, False]):
...     print("I was true")