In [1]:
# Pandas Web User Guide Follow along

# Computational tools

#    https://pandas.pydata.org/docs/user_guide/computation.html

# Created 12/02/20

In [2]:
# Notes & hints on Matplotlib & related visualization
# Created by: Tony Held, tony.held@gmail.com
# Created on: 12/01/2020

# **********************************************
#    Standard Imports 
# **********************************************

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

# **********************************************
#    Matplotlib related settings 
#
#    (also applies to figures created in pandas)
# **********************************************

# Jupyter specific settings start with %

# %matplotlib 'option' determines if figures are inline or will update dynamically

# Not dynamic, but may be the most robust
# %matplotlib inline
# Dynamic and can update figures
%matplotlib notebook
# May only work in jupyter labs (not notebook)
# %matplotlib widget     

# Default matplotlib figure size
plt.rcParams["figure.figsize"] = (5,3)     # Set default fig size

# Setting figure size parameter for a single plot made from a pandas dataframe
# df.plot(figsize=(4,3))

# **********************************************
#    Matplotlib Figure Paradigms
#
#    Details at: https://matplotlib.org/3.3.3/tutorials/introductory/usage.html
# **********************************************

# There are two paradimes for using Matplotlib to create/manipulate matplotlib figures:

#    1.  Explicitly create figures and axes, 
#           and call methods on them from an axes (the "object-oriented (OO) style").
#    2.  Rely on pyplot to automatically create and manage the figures and axes, 
#           and use pyplot functions for plotting.

# In OO style you create the figures and axis ahead of time 
# and then call plot on the axis you created

# fig, ax = plt.subplots()  # Create a figure and an axes.
# ax.plot(x,y)              # call plot from the axes you created
# ax.set_xlabel('x label')  # Add an label to the x axes.


# In automatic mode, you call plot and the figures/axes will be created dynamically
# It requires less code than OO mode initially, but if you want to modify the figure
# it may require additional code to determine the proper references to the figure/axes

# plt.plot(x,y)              # create plot - matplotlib guesses which figure/axes to put it on
# plt.xlabel('x label')      # Add an label to the x axes
                             # Notice you don't use 'set_' when labeling from plt
                             # Whereas you did when plotting in OO mode
        
# The form of the plot documentation for each mode is:
# matplotlib.pyplot.function     # Automatic Mode
# matplotlib.axes.Axes.function  # OO Mode


# Some automatic mode statements that may be useful
# plt.get_fignums()       # Get list of active figures
# plt.figure(x);          # Activate the figure if it exists, create it if it does not
                          # Semi colon is recomended so  you don't have multiple visualizations of the same fig
# plt.close('all')        # Close all open figures
# plt.gcf()               # Get the current figure.
                          # If no current figure exists, a new one is created using figure()
# plt.gca()               # Get the current axes, creating one if necessary.

# Additional useful function calls
# ax_list = fig.axes      <-- Returns a list of the Axes objects in the Figure object:


# **********************************************
#    Panda Figure Paradigms
# **********************************************

# If you plot directly from a panda object (e.g. a DataFrame)
# your plot will be created in the automatic mode listed above

# If you want your plot to be placed on an axes created in OO mode
# you can specify the axis in the plot statement as follows

# fig, ax = plt.subplots()
# df.plot(..., axis=ax)


# **********************************************
#    Jupyter Interactive Mode Settings
#
#    These control what is printed in the out: cell
# **********************************************

# How interactive you want is discussed:
# https://ipython.readthedocs.io/en/stable/config/options/terminal.html
# Options are: 'all', 'last', 'last_expr', 'none', 'last_expr_or_assign'
# Default is: 'last_expr'

from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity I can only get last_expr_or_assign to work
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

# Allows autocomplete to work properly
%config Completer.use_jedi = False

In [3]:
def diag(*args):
    """Pandas diagnostics"""
    
    for i in args:
        
        if isinstance(i, pd.core.frame.DataFrame):
            print(i.info())
            display(i)
        else:
            print(f'{"-"*40}')
            print(f'Type: {type(i)}')

            try:
                print(f'Length: {len(i)}')
            except:
                pass

            try:
                print(i.info())
            except:
                pass

            try:
                display(i)
            except:
                print(i)
                
z = diag
d = display;

In [4]:
def read_df(text):
    """Create a pandas dataframe from a string of a dataframe
    copied from the pandas website tutorial."""
    lines = text.split('\n')
    cols = lines[0].split()
    index, array = [], []
    for line in lines[1:]:
        vals = line.split()
        index.append(vals[0])
        array.append(vals[1:])
#     print(cols)
#     print(index)
#     print(array)
    df = pd.DataFrame(array, index=index, columns=cols)
    return df

In [5]:
# set random seed used in many of the pandas online documentation examples
np.random.seed(seed=123456)

In [6]:
ser = pd.Series(np.random.randn(8))
ser.pct_change()

0         NaN
1   -1.602976
2    4.334938
3   -0.247456
4   -2.067345
5   -1.142903
6   -1.688214
7   -9.759729
dtype: float64

In [7]:
df = pd.DataFrame(np.random.randn(10, 4))
df.pct_change(periods=3)

Unnamed: 0,0,1,2,3
0,,,,
1,,,,
2,,,,
3,-0.21832,-1.054001,1.987147,-0.510183
4,-0.439121,-1.816454,0.649715,-4.822809
5,-0.127833,-3.042065,-5.866604,-1.776977
6,-2.596833,-1.959538,-2.111697,-3.7989
7,-0.117826,-2.169058,0.036094,-0.067696
8,2.492606,-1.35732,-1.205802,-1.558697
9,-1.012977,2.324558,-1.003744,-0.371806


In [8]:
s1 = pd.Series(np.random.randn(1000))
s2 = pd.Series(np.random.randn(1000))
s1.cov(s2)

0.000680108817431107

In [9]:
frame = pd.DataFrame(np.random.randn(1000, 5),
                     columns=['a', 'b', 'c', 'd', 'e'])  

Unnamed: 0,a,b,c,d,e
0,-1.163717,-0.347476,-1.243356,-0.982868,0.261880
1,-1.024366,1.999135,1.183059,-0.927663,1.145918
2,0.324487,-0.714795,0.052685,0.821626,1.028772
3,0.867733,0.957556,1.222317,1.156500,1.000869
4,-0.939723,-1.081296,0.896745,-0.554688,-2.165213
...,...,...,...,...,...
995,0.748906,2.326435,-0.290917,-1.584261,0.440802
996,-1.440684,1.520191,0.796387,0.012681,0.717808
997,-0.900959,1.180217,-1.297617,-0.131675,0.625264
998,-1.138593,-0.272501,-1.208279,-1.976958,-0.376528


In [10]:
frame.cov()

Unnamed: 0,a,b,c,d,e
a,1.000882,-0.003177,-0.002698,-0.006889,0.031912
b,-0.003177,1.024721,0.000191,0.009212,0.000857
c,-0.002698,0.000191,0.950735,-0.031743,-0.005087
d,-0.006889,0.009212,-0.031743,1.002983,-0.047952
e,0.031912,0.000857,-0.005087,-0.047952,1.042487
