In [None]:
import numpy as np
import pandas as pd

In [None]:
# Series : a fixed-length, ordered dict

In [None]:
srs1 = pd.Series([4,7,-5,3])
srs1

In [None]:
# DataFrame

In [4]:
# DataFrame from NumyPy-array
df1 = pd.DataFrame(np.array([1,2,3,4]))
df1

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [None]:
# DataFrame from a dict
dict1 = {'state': ['Ohio','Ohio','Ohio','Nevada','Nevada','Nevada'],
           'year': [2000,2001,2002,2001,2002,2003],
           'pop': [1.5,1.7,3.6,2.4,2.9,3.2]}
df2_dict = pd.DataFrame(dict1)
df2_dict

In [None]:
df2_dict.head()

In [None]:
df3_dict = pd.DataFrame(dict1, columns=['year','state','pop','debt'],
                          index=['one','two','three','four','five','six'] )
df3_dict

In [None]:
# Retrieve column as a Series by dict-like notation or attribute
column_series = df3_dict['state'] # dict-like notation

In [None]:
column_series

In [None]:
# Retrieve column as a DataFrame by dict-like notation or attribute
column_dataframe = df3_dict[['state']] #  double square bracket to select column(s) and want a DataFrame containing the selected column(s)
column_dataframe

In [None]:
column_dataframe.info

In [None]:
df3_dict.state # using attribute

In [None]:
# Retrieve row by position or name with the special loc attribute
df3_dict.loc['three']

In [None]:
# --- Creating DataFrame from nested Dict of Python dicts ---
# Pandas will interpret the outer dict keys as the columns and the inner keys as the row indices

dict_Python = { 'Nevada': { 2002: 2.4, 2002: 2.0 },
                'Ohio': { 2000: 1.5, 2001: 1.7, 2002: 3.6 } }
frame_dict_Python = pd.DataFrame(dict_Python)
frame_dict_Python

In [None]:
# --- Creating DataFrame from nested Dict of Pandas Series ---
dict_Series = { 'Nevada': frame_dict_Python['Nevada'][:2],
                'Ohio': frame_dict_Python['Ohio'][:-1] }
frame_dict_Series = pd.DataFrame(dict_Series)
frame_dict_Series


In [None]:
frame_dict_Python.index

In [None]:
frame_dict_Python.columns

In [None]:
# Indexing. selection, filtering
# Note: Slicing with labels behaves differently than normal Python slicing in that the end-point is inclusive

frame_series = pd.Series(np.arange(4,), index=['a','b','c','d'])
frame_series

In [None]:
# Note that with labels the end-point is inclusive
frame_series['b':'c']

In [None]:
# Note that with index, like normal Python slicing, the end-point is exclusive
frame_series[1:3]

In [None]:
# Selection with loc and iloc to select a subset of the rows and columns from a DataFrame with NumPy-like notation
frame1 = pd.DataFrame(np.arange(16).reshape(4,4),
                      index=['Ohio', 'Colorado', 'Utah','New York'],
                      columns=['one', 'two', 'three', 'four'])
frame1

In [None]:
frame1.loc[['Colorado'], ['two','three']]

In [None]:
frame1.iloc[2, [3,0,1]]

In [None]:
frame1.iloc[2]

In [None]:
frame1.loc['Colorado':'Utah']

In [None]:
# Operations between DataFrame and Series
series2 = pd.Series(range(3), index=['b','e','f'])
series2

In [None]:
# Arithmetic between DataFrame and Series
# Default (broadcasting down the DataFrame's rows): Matches the index of the Series on the DataFrame's columns, broadcasting down the rows
frame2 = pd.DataFrame(np.arange(12.).reshape(4,3),
                      index=['Utah', 'Ohio', 'Texas', 'Oregon'],
                      columns=list('bde'))
frame2

In [None]:
frame2 +  series2

In [None]:
#  Option to broadcast over the DataFrame's columns: Matches the index of the Series on the DataFrame's rows, broadcasting over the columns
#  ( Have to use one of the arthmetic methods )
series3 = frame2['d']
frame2.sub(series3, axis=0)  # or axis='index'

In [None]:
# Correlation and Covariance
import pandas_datareader.data as web
all_data = {ticker: web.get_data_yahoo(ticker)
            for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}
price = pd.DataFrame( {ticker: data['Adj Close']
                       for ticker, data in all_data.items() } )
volume = pd.DataFrame( {ticker: data['Volume']
                       for ticker, data in all_data.items() } )

In [None]:
returns = price.pct_change()
returns.tail()

In [None]:
returns['MSFT'].corr(returns['IBM'])

In [None]:
returns['MSFT'].cov(returns['IBM'])

In [None]:
returns.corr()

In [None]:
returns.cov()

In [None]:
returns.corrwith(returns.IBM)

In [None]:
returns.corrwith(volume)

In [None]:
returns.corrwith(volume, axis='columns')