# Python notebook to help expedite coding <a name="Head"></a>

# Table of contents: 

[Imports for Notebook](#Imports)  
[Virtual Environments](#Venv)
[Python Native Functions](#Native)  
[Useful Functions](#Useful_Functions)  
[Loop through all files in a directory](#Loop_Directory)  
[Import data from file](#Import_Data)  
[Export data to file](#Export_Data)  
[Graphing](#Graphing)  
[Numpy](#Numpy)  
[Pandas](#Pandas)  
[Bits and Bytes](#Bits_Bytes)  
[Scipy Curve Fit](#curve_fit)  
[Exploratory Data Analysis](#EDA)  
[If Name is Main](#Name_Main)  

# Imports for Notebook <a name="Imports"></a>

In [None]:
import glob, os, csv, platform, sys, random, time
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn

# These lines are to allow multiple outputs when running cells in the notebook. Not needed outside of notebook.
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
# Turns off auto complete on brackets, quotes, parenthesis, etc...
# Run this cell then restart kernel
from notebook.services.config import ConfigManager
c = ConfigManager()
c.update('notebook', {"CodeCell": {"cm_config": {"autoCloseBrackets": False}}})

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999

# Virtual Environments <a name="Venv"></a>

To create a virtual environment in the current working directory:  
python -m venv {name}  

To activate the virtual environment, run the activate.bat file in the Scripts folder:  
\Scripts\activate.bat  

Pip can then be used to install packages to the virtual environment:  
python -m pip install {package} {...}  

To save a list of installed packages in the virtual environment, use the freeze function of pip and output to a text file:  
python -m pip freeze > packages.txt  

To install packages from a packages text file, use the -r flag of the install function of pip:  
python -m pip install -r packages.txt  

# Python Native Functions <a name="Native"></a>

Native functions may be ran within the notebook for testing.

In [None]:
x = 10
y = 5
x + y # Sum of x and y
x - y # Difference of x and y
x * y # Product of x and y
x / y # Quotient of x and y
x // y # Floored quotient of x and y
x % y # Remainder of x / y
-x # Negated x
x ** y # x to the power of y

In [None]:
x = -1
abs(x) # Return the absolute value of a number

In [None]:
condition = [True, False, True]
all(condition) # Return true if all elements are true
any(condition) # Return true if any element is true

In [None]:
iterable = ['a', 'b', 'c']
list(enumerate(iterable)) # Returns a list of tuples containing a count and the values obtained from iterating

for count, item in enumerate(iterable): 
    count, item

In [None]:
input("User Input: ") # User input with prompt

In [None]:
x = [1, 2, 3, 4, 5]
len(x) # Returns length of an object
max(x) # Returns maximum item in an iterable
min(x) # Returns minimum item in an iterable
print(x) # Prints to idle
sum(x)

In [None]:
# range(stop) or range(start, stop [,step])
# creates iterable from 0 to stop or from start to stop with option of step size
list(range(10))
list(range(5,10))
list(range(2,11,2))

In [None]:
x = [1, 2, 3, 4, 5]
y = [2, 3, 4, 5, 6]
list(zip(x,y)) # Returns list of tuples where the i-th tuple contains the i-th element in the arguments

In [None]:
x = 10
int(x) # Returns integer number
float(x) # Returns floating point number
str(x) # Returns string object

re = 2
im = 3
complex(re,im) # Returns complex number with real part (re) and imaginary part (im)

y = 4
divmod(x,y) # Returns the pair (x // y, x % y)
pow(x,y) # Returns x to the power of y

Comparisons:  
'<' Strictly less than  
'<=' Less than or equal to  
'>' Strictly greater than  
'>=' Greater than or equal to  
'==' Equal  
'!=' Not equal  
'is' Object identity  
'is not' Negated object identity  

In [None]:
s = [1,2,3,4,5]
t = [6,7,8,9,10]
x = 2
x in s # True if an item of s is equal to x, else false
x not in s # False if an item of s is equal to x, else true
s + t # Concatenation of s and t
n = 3
s * n # Equivalent to adding s to itself n times
i = 0
s[i] # ith item of s, origin 0
j = 4
s[i:j] # Slice of s from i to j
k = 2
s[i:j:k] # Slice of s from i to j with step k
s.index(x) # Index of the first occurence of x in s
s.count(x) # Total number of occurences of x in s
s.append(x) # add element to end of the list
s.insert(i,x) # add x element at i index and shifts to right
s.remove(x) # removes first instance of x from list
s.sort() # sorts list in place
s.reverse() # reverses list in place
s.pop(i) # removes and returns the element at given index

In [None]:
dictionary = {'key1': [1,2,3], 'key2': [4,5,6], 'key3': [7,8,9]} # define a dictionary
dictionary['key1'] # Accessing a dictionary by key
dictionary['newkey'] = [10,11,12] # Adding to dictionary
del dictionary['newkey'] # delete an entry in the dictionary
dictionary['key1'][0] # access a value within the entry of the dictionary
len(dictionary) # returns number of keys in dictionary
dictionary.keys() # returns the keys of the dictionary
dictionary.pop('key2') # removes key from dictionary and returns values within the key
dictionary.clear() # removes all elements from dictionary
dictionary2 = {'key4': ['a','b','c']}
dictionary.update(dictionary2) # adds second dictionary to first
dictionary.items() # iterator over elements in dictionary

# Useful Functions <a name="Useful_Functions"></a>

These are commonly used functions not easily found in modules. Will not work within the notebook. Meant to be copied into scripts.

In [None]:
def listSerialPorts():
    # A function that tries to list serial ports on most common platforms
    system_name = platform.system()
    if system_name == "Windows":
        available = []
        for i in range(256):
            try:
                s = serial.Serial('com'+str(i+1))
                available.append("COM"+str(i+1))
                s.close()
            except serial.SerialException:
                pass
        return available
    elif system_name == "Darwin":
        # Mac
        return glob.glob('/dev/tty.usb*')# + glob.glob('/dev/cu*')
    else:
        # Assume Linux
        return glob.glob('/dev/ttyS*') + glob.glob('/dev/ttyUSB*')

In [None]:
def rmse(prediction, target):
    # returns root mean square error between prediction and target
    if len(prediction) != len(target):
        print("Error: Length of arrays must be the same to calculate rmse")
        sys.exit(1)
    if isinstance(prediction, list):
        prediction = np.asarray(prediction) # convert from list to numpy array
    if isinstance(target, list):
        target = np.asarray(target) # convert from list to numpy array
    return np.sqrt(((prediction - target) ** 2).mean())

In [None]:
def calc_slope(x, y):
    # Calculate the slope of discrete points x and y
    if len(x) != len(y):
        print("Error: Length of arrays must be the same to calculate slope")
        sys.exit(1)
    if isinstance(x, list):
        x = np.asarray(x)
    if isinstance(y, list):
        y = np.asarray(y)
    return np.sum(((x - np.mean(x)) * (y - np.mean(y)))) / np.sum(((x - np.mean(x)) ** 2))

In [None]:
def fir_sec_der(x_data, y_data, box_width):
    """
    Returns the first and second derivative of the y_data
    The box_width is the number of data points to the left and right to include in calculating the derivative
    The edges of the data are zero if they are within the box_width distance to the edge
    """
    
    if len(x_data) != len(y_data):
        print("Error: Length of arrays must be the same to calculate slope")
        sys.exit(1)
    if isinstance(x_data, list):
        x_data = np.asarray(x_data)
    if isinstance(y_data, list):
        y_data = np.asarray(y_data)
    
    # Variables to hold first and second derivative data
    dt_y_data = np.zeros_like(y_data)
    ddt_y_data = np.zeros_like(y_data)

    # Loop through all data to calculate first derivative
    for index in range(box_width, len(y_data) - box_width):
        dt_y_data[index] = calc_slope(x_data[index - box_width:index + box_width + 1], 
                                       y_data[index - box_width:index + box_width + 1])

    # Loop through all first derivative data to calculate second derivative
    for index in range(box_width * 2, len(dt_y_data) - (2 * box_width)):
        ddt_y_data[index] = calc_slope(x_data[index - box_width:index + box_width + 1],
                                              dt_y_data[index - box_width:index + box_width + 1])
        
    return dt_y_data, ddt_y_data

In [None]:
def pad_filename_iterator(extension = '.txt', delimiter = '_', pad = 4):
    # Used for padding file iterators in current directory with zeros
    # Output data will be in order when looping through all files
    # File iterator needs to be at the end of the filename
    # Windows only function to use '*' wildcard character
    # ex. test_1.txt -> test_0001.txt
    for txt_file in glob.glob(os.getcwd() + '\*' + extension):
        name = os.path.basename(txt_file[:-4]) # collect filename without file extension
        split = name.split(delimiter) # split name by delimiter to identifiers and cycle number
        cycle = split[-1].zfill(pad) # pad the cycle number with zeros so data output will be in order
        new_name = '' # initialize new filename
        for identifier in split[:-1]:
            new_name += identifier + delimiter # add all identifiers to new name
        new_name = new_name + str(cycle) # combine identifiers and padded cycle number together
        os.rename(txt_file,os.getcwd() + '/' + new_name + extension) # rename the file with padded cycle number

In [None]:
def transpose_csv(file_name):
    pd.read_csv(file_name, header = None).T.to_csv(file_name, header = False, index = False)

# Loop through all files in a directory <a name="Loop_Directory"></a>

Used when analyzing data in multiple files in the same directory. Can do either '.txt' or '.csv' files by changing the extension in the path. Has not been tested on Mac computers. Mac may not support the '*' wildcard character. Not meant to be run within the notebook.

In [None]:
for file in glob.glob(os.path.join(os.getcwd(), '*.txt')): # or *.csv
    # file is the full path to the file including extension
    filename = os.path.basename(file) # with extension
    filename = os.path.basename(file[:-4]) # without extension

Returns all files in the source directory and sub directories

In [None]:
source_directory = 'path'
files = []
for (path, directory, filenames) in os.walk(source_directory):
    files += [os.path.join(path, file) for file in filenames]

# Import data from file <a name="Import_Data"></a>

## Read data line by line

This is useful when you may not want all the data, but only a portion of it. Not meant to be run within the notebook.

In [None]:
with open('test.txt') as file_in:
    for row in file_in:
        # row is full text string in file including new line character
        row = row[:-1] # removes new line character from end of string
        row_list = row.split(',') # split string into list if there is a delimiter character
        row_float_list = [float(number) for number in row.split(',')] # split string and float numbers

## Read data into numpy array

This can pull all numerical data into a numpy array very quickly. Numpy arrays only use numerical values. Not meant to be run within the notebook.

In [None]:
data = np.loadtxt('test.txt', # filename of data
                  delimiter = ',', # separate columns by delimiter
                  skiprows = 1, # skip header if there is text
                  dtype = float, # can use np.str to load data if getting error
                  usecols = (range(1,4)), # skip first column if text, None returns all columns
                  unpack = False) # use unpack to separate columns into unique variables

In [None]:
data = np.genfromtxt('test.txt', # filename of data
                     dtype = float, # data type of data
                     delimiter = ',', # separate columns by delimiter
                     skip_header = 3, # skip number of lines at top of file
                     skip_footer = 502) # skip number of lines at bottom of file

## Read data into pandas dataframe

Read data directly into dataframe
https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

In [None]:
data = pd.read_csv('test.txt', # filename of data
                 delimiter = ',', # delimiter character
                 header = 1, # row number to use as column names
                 #names = ['A','B','C'], # If file has no header, specify column names and pass header = None
                 index_col = None, # column number to use as index, None uses row number
                 true_values = ['true'], # values to consider as True
                 false_values = ['false'], # values to consider as False
                 skiprows = None, # number of rows to skip or use function to specify (lambda x: x % 10 == 0)
                 nrows = None, # Read certain number of rows
                 na_values = ['NaN'] # What values are considered NaN
                 )

# Export data to file <a name="Export_Data"></a>

In [None]:
# Define a file path
file = os.getcwd() + '/output.csv' # full output file path

## Directly writing to file

You must do all the formatting yourself when writing directly to the file. Remember to close the file at the end of the script. Issues will arise if the script hits an error before it can close the file. Not meant to be run within the notebook.

In [None]:
if os.path.isfile(file):
    fileout = open(file, 'a') # If file exists, append to file
else:
    fileout = open(file, 'w') # If file does not exist, write file
fileout.write('header 1, header 2, header 3\n') # write header to file
fileout.write('data 1, data 2, data 3\n') # write data to file
# '\n' is new line character
# to skip a row '\n\n'
fileout.close() # close file at end of program

## Using csv module

The csv module can handle the formatting when writing to the file. Another option instead of 'writerow' is to use 'writerows' which can write multiple lists to the file at once. Not meant to be run within the notebook.

In [None]:
# Initialize file at beginning of program
if os.path.isfile(file): # skip initialization if file exists
    pass
else:
    with open(file, 'w', newline = '') as fileout: # using 'with' prevents forgetting to close file
        writer = csv.writer(fileout) # initialize csv writer object
        writer.writerow(['header 1', 'header 2', 'header 3']) # write list of data as row to file
        
# Append data to file through iteration
with open(file, 'a', newline = '') as fileout: # using 'with' prevents forgetting to close file
    writer = csv.writer(fileout) # initialize csv writer object
    writer.writetrow(['data 1', 'data 2', 'data 3']) # write list of data as row to file

## Using Numpy module

Outputs numerical data to file

In [None]:
x = np.array([[1,2,3],[4,5,6]])
np.savetxt('test.txt', # file path
           x, # Numpy array
           delimiter = ',', # Separator character
           fmt = '%.18e', # Format of values
           newline = '\n', # Newline character
           header = 'Header', # Comments at top of file
           footer = 'Footer') # Comments at end of file

# Formatting values
# %.18e represents 18 decimal points in scientific notation
# %d represents signed integer
# %.5f represents 5 decimal points in floating point notation

## Using Pandas module

Outputs Pandas DataFrame to csv file

In [None]:
df.to_csv('test.txt', # file path
         sep = ',', # Delimiter character
         na_rep = 'NaN', # string to represent NaN
         columns = None, # Specify specific columns, None writes all columns
         header = True, # Write out column names to file
         index = True # Write row index names
         )

# Graphing <a name="Graphing"></a>

Use dumby data for the graphing portion below. Graphing section can be ran within the notebook.

https://matplotlib.org/api/pyplot_summary.html

In [None]:
# Used to specify graphing inline with the notebook
%matplotlib inline

In [None]:
# Used to specify graphing with widgets similar to plotting in IDLE
%matplotlib nbagg

In [None]:
x = np.arange(1, 11, step = 1)
y = np.arange(1, 11, step = 1)

## Single graph

Plots to one figure. To plot multiple lines, use another 'plt.plot' or iterate through y data with 'plt.plot'.

In [None]:
plt.clf() # Clears the figure, useful when iteratively plotting multiple graphs
plt.figure(figsize = (15, 10))
plt.plot(x, # x data
         y, # y data
         c = 'r', # color
         ls = '-', # linestyle
         lw = 1, # line width in number points
         marker = '.', # marker
         ms = 10, # marker size in number of points
         label = 'data') # string identifier for legend

plt.axhline(2, color = 'y') # horizontal line on plot
plt.axvline(3, color = 'g') # vertical line on plot
plt.axhspan(6, 7, color = 'b') # horizontal area on plot
plt.axvspan(4, 5, color = 'm') # vertical area on plot

plt.legend(loc = 'best') # location of legend

plt.title('Title') # chart title

plt.xlabel('x-axis', fontsize = 20) # x-axis label
plt.xticks(np.arange(0, 16, step = 5), fontsize = 20) # set x-axis ticks
plt.xlim(0, 15) # set the limits of the x-axis

plt.ylabel('y-axis', fontsize = 20) # y-axis label
plt.yticks(np.arange(0, 17, step = 2), fontsize = 20) # set y-axis ticks
plt.ylim(0, 12) # set the limits of the y-axis

#plt.savefig(os.getcwd() + '/graph.png', dpi = 1000) # dpi is resolution
plt.show() # shows figure

To iteratively add data to the same figure, multiple lines, does not work in jupyter

In [None]:
# import matplotlib.pyplot as plt
# import time, random

# Number of lines to plot random values
num_of_lines = 10

# Initialize lists to hold random data
xdata = []
ydata = [[] for iterate in range(num_of_lines)]

# Initialize Plot
plt.show()
axes = plt.gca()

# Initialize list that holds all lines within the plot
lines = []
for line in range(num_of_lines):
    lines.append(axes.plot(xdata,
                           ydata[line],
                           ls = '-',
                           lw = 0.5,
                           marker = '.',
                           ms = 5))

for i in range(100):
    # Append random data to x and y data
    xdata.append(i)
    for line in range(num_of_lines):
        ydata[line].append(random.randrange(0,100))
        # Set the data for each line, [0] is to select first element in tuple of line object
        lines[line][0].set_data(xdata, ydata[line])
        
    # Relimit the graph
    axes.relim()
    axes.autoscale_view(True)
    
    # Draw the graph and give time to draw
    plt.draw()
    plt.pause(1e-17)
    
    # Slow down iterating for viewing
    time.sleep(0.1)
 
# add this if you don't want the window to disappear at the end
plt.show()

## Subplots

Subplots can plot multiple graphs in one figure. To plot multiple lines in each subplot, use another 'ax[i].plot' inside the loop. You do not need to loop through all the subplots, but can reference them directly, ex. 'ax[0]'.

In [None]:
fig, ax = plt.subplots(2, 2, dpi = 100, figsize = (15,10)) # fig is figure, ax is subplots, 2 rows x 2 columns

ax = ax.flatten() # allows iteration over 2-D ax object
# not needed if only 1 column, reads left to right, top to bottom

fig.suptitle('Figure Title') # main title over all subplots

color = ['r', 'b', 'g', 'c']
titles = ['plot 1', 'plot 2', 'plot 3', 'plot 4']
label = ['data 1', 'data 2', 'data 3', 'data 4']

for i in range(4): # loop through all subplots
    ax[i].plot(x, # x data
               y, # y data
               c = color[i], # color
               ls = '-', # line style
               lw = 1, # line width
               marker = '.', # marker
               ms = 10, # marker size
               label = label[i]) # string identifier for legend
    
    ax[i].axhline(i + 3) # horizontal line on plot
    ax[i].axvline(i + 5) # vertical line on plot
    ax[i].axhspan(0, i + 1) # horizontal area on plot
    ax[i].axvspan(0, i + 1) # vertical area on plot
    
    ax[i].legend('best') # location of legend
    
    ax[i].set_title(titles[i], fontsize = 20) # subplot title
    
    ax[i].set_xlabel('x-axis', fontsize = 20) # x-axis label
    ax[i].set_xticks(np.arange(0, 16, step = 5)) # set x-axis ticks
    ax[i].set_xlim(0, 15) # set the limits of the x-axis
    
    ax[i].set_ylabel('y-axis', fontsize = 20) # y-axis label
    ax[i].set_yticks(np.arange(0, 17, step = 2)) # set y-axis ticks
    ax[i].set_ylim(0, 12) # set the limits of the y-axis
    
    ax[i].tick_params(axis = 'both', which = 'major', labelsize = 15) # Adjust font size of ticks
    
plt.tight_layout() # prevents text from overflowing in graph

#plt.savefig(os.getcwd() + '/graph.png', dpi = 1000) # dpi is resolution

plt.show() # shows figure

## Plot Module

In [None]:
import matplotlib.pyplot as plt

"""
Useful for iteratively adding data to a graphical interface throughout an experiment
The plot class has one graph with multiple lines
The subplot class has subplots with multiple lines
User should initialize class, set titles, set legend, then add data
x data should be a single value and y data should be a single value for each graph and each line in a list
example: plot.add(1, [2, 3, 4]) is adding 1 to x, 2 to y1, 3 to y2, and 4 to y3 for a 3 lined graph
example: subplot.add(1, [[2], [3, 4]] is adding 1 to x, 2 to y in subplot 1, and 3 to y1 and 4 to y2 in subplot 2
See working examples at the bottom of the file to run
"""

class plot:
    def __init__(self, num_lines, ls = '-', lw = 0.5, marker = '.', ms = 5, roll = 500):
        plt.show(block = False) # prevents the graph from halting the script
        self.axes = plt.gca() # grab current axes object
        self.xdata = [] # initializes list for holding x data
        self.ydata = [[] for iterate in range(num_lines)] # initializes list for every y data
        self.title = 'Graph' # default title above graph
        self.xlabel = 'x-axis' # default x axis label
        self.ylabel = 'y-axis' # default y axis label
        self.roll = roll # maximum number of points for each line on graph
        self.lines = [] # list to hold all lines on graph
        for line in range(num_lines):
            # add all lines to the plot
            self.lines.append(self.axes.plot(self.xdata,
                                             self.ydata[line],
                                             ls = ls, # linestyle
                                             lw = lw, # line width
                                             marker = marker, # marker type
                                             ms = ms)) # marker size

    # update the title and x and y axis labels
    # title, xlabel, and ylabel should be strings
    def set_titles(self, title, xlabel, ylabel):
        self.title = title
        self.xlabel = xlabel
        self.ylabel = ylabel

    # set legend for data series
    # labels should be list of string labels
    def set_legend(self, labels):
        for line, label in enumerate(labels):
            self.lines[line][0].set_label(label)

    # add xdata and ydata to the graph
    # xdata should be single value
    # ydata should be list of values to add to each line
    def add(self, xdata, ydata):
        if len(self.xdata) > self.roll:
            self.xdata.pop(0)
        self.xdata.append(xdata) # append x data
        for line, data in enumerate(ydata):
            if len(self.ydata[line]) > self.roll:
                self.ydata[line].pop(0)
            self.ydata[line].append(data) # append y data
            self.lines[line][0].set_data(self.xdata, self.ydata[line]) # update line
        self.axes.relim() # relimit the axes
        self.axes.autoscale_view(True) # autoscale the axes
        plt.suptitle(self.title) # set title
        plt.xlabel(self.xlabel) # set x label
        plt.ylabel(self.ylabel) # set y label
        plt.legend(loc = 1) # set legend, loc = 1 is upper right
        plt.draw() # update the figure
        plt.pause(1e-17) # pause for figure to update

class subplot:
    def __init__(self, rows, columns, num_lines, ls = '-', lw = 0.5, marker = '.', ms = 5, roll = 500):
        plt.show(block = False) # displays graph and prevents from halting program
        self.rows = rows # number of rows in subplot
        self.columns = columns # number of columns in subplot
        self.num_lines = num_lines # store number of lines variable
        self.fig, self.ax = plt.subplots(self.rows, self.columns) # initializes subplot
        self.ax = self.ax.flatten() # flattens axes for single value iteration instead of tuples
        self.xdata = [] # initialize x data container
        self.ydata = [] # initialize y data container
        for graph in range(len(self.num_lines)):
            self.ydata.append([[] for line in range(self.num_lines[graph])]) # add container for ydata
        self.title = ['Graph' for graph in range(len(self.num_lines))] # initialize titles
        self.xlabel = ['x-axis' for graph in range(len(self.num_lines))] # initialize x labels
        self.ylabel = ['y-axis' for graph in range(len(self.num_lines))] # initialize y labels
        self.roll = roll # define max number of points on graph
        self.lines = [[] for graph in range(len(self.num_lines))] # initialize container for line objects
        # add line object to subplot lists
        for graph in range(len(self.num_lines)):
            for line in range(self.num_lines[graph]):
                self.lines[graph].append(self.ax[graph].plot(self.xdata, # xdata
                                                             self.ydata[graph][line], # ydata
                                                             ls = ls, # line style
                                                             lw = lw, # line width
                                                             marker = '.', # marker type
                                                             ms = 5)[0]) # marker size

    def set_titles(self, title, xlabel, ylabel):
        self.title = title # titles above the subplots
        self.xlabel = xlabel # x-axis labels
        self.ylabel = ylabel #y-axis labels

    def set_legend(self, labels):
        for graph in range(len(self.num_lines)):
            for line in range(self.num_lines[graph]):
                self.lines[graph][line].set_label(labels[graph][line]) # adding label to each line for legend  

    def add(self, xdata, ydata):
        if len(self.xdata) > self.roll:
            self.xdata.pop(0) # if number of points is greater than roll, remove first element in list
        self.xdata.append(xdata) # append x data to the x data list
        for graph in range(len(self.num_lines)):
            for line in range(self.num_lines[graph]):
                if len(self.ydata[graph][line]) > self.roll:
                    self.ydata[graph][line].pop(0) # roll again
                self.ydata[graph][line].append(ydata[graph][line]) # append y data to lists
                self.lines[graph][line].set_data(self.xdata, self.ydata[graph][line]) # update each line in subplots
            self.ax[graph].relim() # relimit axes
            self.ax[graph].autoscale_view(True) # scale the view
            self.ax[graph].set_title(self.title[graph]) # set the subplot title
            self.ax[graph].set_xlabel(self.xlabel[graph]) # set the x-axis label
            self.ax[graph].set_ylabel(self.ylabel[graph]) # set the y-axis label
            self.ax[graph].legend(loc = 1) # set the legend in the upper right corner
        plt.tight_layout() # tight layout prevents overlapping text
        plt.draw() # update the figure with new data
        plt.pause(1e-17) # allow time for updating
                   
if __name__ == "__main__":
    import time, random
    plot_test = False # single plot test
    subplot_test = False # subplot test
    if plot_test == subplot_test:
        print('Set only one test to true and the other to false')
    elif plot_test == True:
        graph = plot(2, roll = 20) # initialize class with number of lines and roll amount
        graph.set_titles('new title', 'x', 'y') # set the title and axis labels
        graph.set_legend(['1', '2']) # set the legend for the two lines
        for i in range(100):
            graph.add(i, [random.randrange(0, 100), random.randrange(0, 100)]) # add random data to the graph
            time.sleep(0.1) # slow down graphing for visualization
    elif subplot_test == True:
        graph = subplot(2, 1, [1, 1], roll = 20) # initialize class with number of row, columns, lines in each subplot, and roll amount
        graph.set_titles(['new title1', 'new title2'], ['x1', 'x2'], ['y1', 'y2']) # set title and axis labels
        graph.set_legend([['1'], ['2']]) # set the legend for the two lines in separate subplots
        for i in range(100):
            graph.add(i, [[random.randrange(0, 100)], [random.randrange(0, 100)]]) # add random data to the graph
            time.sleep(0.1) # slow down graphing for visualization
    else: pass

## Other arguments for graphing

colors: 'b' blue, 'g' green, 'r' red, 'c' cyan, 'm' magenta,
  'y' yellow, 'k' black
  
linestyles: '-' solid, '--' dashed, '-.' dash dot, ':' dotted, '' none

linemarkers: '.' point, 'o' circle, 'v' triangle down, '^' triangle up,
  '<' triangle left, '>' triangle right, 's' square, 'p' pentagon,
  '*' star, '+' plus, 'x' cross, 'd' diamond
  
legend: 'best', 'upper right', 'upper left', 'lower right', 'lower left',
  'center right', 'center left', 'lower center', 'upper center'

# Numpy <a name="Numpy"></a>

Numpy Reference  
https://docs.scipy.org/doc/numpy/reference/

## Constants

In [None]:
np.inf # Infinity
np.e # Euler's constant
np.pi # Pi
np.NaN # not a number

## Creating an Array

In [None]:
x = np.array([[1,2,3],[4,5,6]]) # Define array using list
y = [[1,2,3],[4,5,6]]
np.asarray(y) # Convert list to array
np.arange(10) # Create array similar to Python range function, np.arange([start,] stop [,step])
np.linspace(1,100,num = 10) # Creates an array between start and stop with defined number of values (start,stop,num)

In [None]:
np.identity(2) # Return identity matrix with given length
np.ones((2,2)) # Return an array with all ones
np.zeros((2,2)) # Return an array with all zeros
np.zeros_like(x) # Return an array with all zeros with same shape as array argument
np.full((2,2),5) # Return an array filled with specified value with given shape

In [None]:
x = np.array([[1,2,3],[4,5,6]])
x.tolist() # Return array as a list
x.flatten() # Return copy of array collapsed to one dimension
y = x.copy() # Creates a copy of array instead of a pointer
x.real # Return real part of array
x.imag # Return imaginary part of array

## Basic Operations on an array

In [None]:
x = np.arange(6)
x.shape # Check shape of array
x.T # Perform transpose on array
x.reshape(2,3) # reshapes array to specified rows and columns

In [None]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
y = np.array([10,11,12])
x.shape
y.shape
y = np.expand_dims(y,axis=0) # Add dimension to numpy array
y.shape
np.concatenate((x,y), axis = 0) # Add row to bottom
np.concatenate((x,y.T), axis = 1) # Add value to next column

In [None]:
x = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
np.delete(x, 1, axis = 0) # Delete axis 0 at index 1
np.insert(x, 0, [0,0,0,0], axis = 0) # Insert values at index along axis
np.append(x,[[13,14,15,16]],axis = 0) # Append values to end of array

In [None]:
# Slice an array based on conditions
x = np.arange(100).reshape(25,4)
x
x[x[:,0] >= 50] # One condition slice
x[(x[:,0] >= 30) & (x[:,0] <= 60)] # Two condition slice

## Measurements on array

In [None]:
x = np.array([[1,2,3,4], [5,2,7,8], [9,10,11,12]])
np.where(x == 2) # Returns indices where condition is satisfied
x.nonzero() # Return indices of the elements that are non-zero
x.argmax(axis = 0) # Return indices of maximum values along given axis
x.argmin(axis = 1) # Return indices of minimum values along given axis
x.min(axis = 0) # Return minimum values along given axis
x.max(axis = 1) # Return maximum values along given axis
x.ptp(axis = 0) # Return peak to peak (max - min) along a given axis
x.sum(axis = 0) # Return the sum of the array over the given axis
x.cumsum(axis = 1) # Return the cumulative sum over the array over a given axis
x.mean(axis = 0) # Return the mean of the array over a given axis
x.var(axis = 0) # Return the variance of the array over a given axis
x.std(axis = 0) # Return the standard deviation of the array over a given axis
x.prod(axis = 0) # Return the product of the array elements over a given axis
x.cumprod(axis = 0) # Return the cumulative product of the array over a given axis
x.all(axis = 0) # Returns true if all elements evaluate to true over a given axis
x.any(axis = 0) # Returns true if any elements evaluate to true over a given axis

## Iterating over array

In [None]:
a = np.arange(6).reshape(2,3)
for x in np.nditer(a):
    print(x, end = ' ')

# Pandas <a name="Pandas"></a>

Pandas Reference
https://pandas.pydata.org/pandas-docs/stable/index.html

## Creating Dataframe

In [None]:
df = pd.DataFrame(np.zeros((5,5)), # Pass numpy array with values
                 index = ['a', 'b', 'c', 'd', 'e'], # Specify the index
                 columns = ['A', 'B', 'C', 'D', 'E']) # Specify the column names
df

In [None]:
df['A'] = [-2, -1, 0, 1, 2]
df['B'] = [100, 200, 300, 400, 500]
df['C'] = ['red', 'blue', 'red', 'green', 'orange']
df['D'] = ['one', 'two', 'two', 'one', 'two']
df['E'] = [[1,2],[3,4],[5,6],[7,8],[9,10]]
df['F'] = ''
df

In [None]:
data = {'A':['a','b','c'], 'B':['d','e','f']}
df = pd.DataFrame(data)
df

## Exploring Dataframe

In [None]:
df.head() # Shows first 5 rows of data frame

In [None]:
df.tail() # Shows last 5 rows of data frame

In [None]:
df.index # Returns the index of data frame

In [None]:
df.columns # Returns the columns of data frame

In [None]:
df.describe() # Quick statistic summary of numerical data in data frame

In [None]:
df['A'].unique() # Returns all unique values in series
df['A'].nunique() # Returns count of unique values in series

In [None]:
df['A'].idxmin() # Returns index of minimum value in series
df['A'].idxmax() # Returns index of maximum value in series

In [None]:
df['A'].value_counts(dropna=False) # Returns unique values and the number of occurences in series

In [None]:
df.info() # Returns some basic information regarding the data frame

## Selecting objects in Dataframe

In [None]:
df['A'] # Select a column

In [None]:
df[1:3] # Slice by row

In [None]:
df['b':'c'] # Slice by index

In [None]:
df.loc['a'] # slice row of certain index

In [None]:
df.loc[:,['B','C']] # Slice all rows of certain columns by index

In [None]:
df.loc[['b','c'],['B','C']] # Slice specific rows or specific columns by index

In [None]:
df.iloc[1:3,2:4] # Slice by row and column number instead of index

In [None]:
df[df.A >= 0] # Select data based on values in column

In [None]:
df[df['D'].isin(['two','three'])] # Select data based on categorical values in data frame

In [None]:
df_grouped = df.groupby('D') # Creates a key that splits the data into groups specified by column
df_grouped.sum()
df_grouped.get_group('one') # Return rows that fall into specified group
df_grouped.agg([np.sum, np.mean, np.std]) # Apply functions to the grouped data
df_grouped = df.groupby(['D','C']) # Can split by more than one column
df_grouped.sum()

## Operations on Dataframe

In [None]:
df.to_numpy() # Turns numerical data frame to numpy array

In [None]:
df.T # Returns transpose of data

In [None]:
df.sort_index(axis = 0, ascending = True) # Sort the data frame by axis

In [None]:
df.sort_values(by = 'B') # Sort data frame by values in column

In [None]:
df.dropna(how = 'any') # Drop all rows where values are missing

In [None]:
df.fillna(value = 0) # Fill all 'na's with a value

In [None]:
pd.isna(df) # Boolean mask showing where there are 'na's in data frame

In [None]:
df.count() # Number of non-na observations
df.sum() # sum of values
df.mean() # mean of values
df.mad() # mean absolute deviation
df.median() # median of values
df.min() # minimum of values
df.max() # maximum of values
df.mode() # mode of values
df.abs() # absolute values
df.prod() # product of values
df.std() # standard deviation of values
df.var() # variance of values
df.cumsum() # cumulative sum of values
df.cumprod() # cumulative product of values

In [None]:
df.apply(np.mean, axis = 0) # Applies function axis columns = 0, rows = 1
df.apply(lambda x: x.max() - x.min()) # Apply custom function using lambda

In [None]:
# loop through dataframe
for index, row in df.iterrows():
    print(index)
    print(row['A'])

In [None]:
# Example melt operation
data = {'name': ['Daniel', 'John', 'Jane'], 'treatment_a': ['NaN', 12, 24], 'treatment_b': [42, 31, 27]}
df = pd.DataFrame(data)
print(df)
df_melt = pd.melt(frame = df, # specify frame to melt
                  id_vars = 'name', # specify column that is the id of the observation
                  value_vars = ['treatment_a', 'treatment_b'], # specify columns that need to be separate observations
                  var_name = 'treatment', # specify name of the column containing identifier value_vars
                  value_name = 'result') # specify name of the column containing values from value_vars
print(df_melt)

In [None]:
# Example pivot operation
data = {'day': ['1','2','3','1','2','3'], 'measurement': ['temp','temp','temp','hum','hum','hum'], 
        'reading': [23,24,23,50,53,52]}
df = pd.DataFrame(data)
print(df)
df_pivot = df.pivot_table(index='day', # what to use to identify an observation
                          columns='measurement', # what are the variables of the observation
                          values='reading', # values associated with each variable
                          aggfunc=np.mean) # in case there are duplicate values for an index
print(df_pivot.head())

# To reset an index after performing pivot
df_pivot_reset = df_pivot.reset_index()
print(df_pivot_reset.head())

In [None]:
# Cross tab counts the combinations between two categorical variables
data = {'temp': ['w','w','w','w','c','c','c'], 'weather': ['s','s','c','s','s','c','c']} # create sample data
df = pd.DataFrame(data)
pd.crosstab(df.temp, df.weather) # cross tab temp and weather

In [None]:
# Map function converts values based on dictionary
df = pd.DataFrame({'A':['a','a','b','b']}) # create sample data frame
mapping = {'a': True, 'b': False} # create mapping dictionary
df['B'] = df.A.map(mapping) # store mapped values into new column of data frame
print(df)

## Graphing data from dataframe

In [None]:
df['D'].plot(kind='hist') # Histogram plot

Refer to graphing reference in Pandas Reference
https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html  
See site for plot keywords https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.DataFrame.plot.html

# Working with Bits and Bytes <a name="Bits_Bytes"></a>

In [None]:
# Convert bit string to integer and byte
s = '10100001'
integer = int(s, 2) # convert s to integer, s has base 2
byte = int(s, 2).to_bytes(1, byteorder = 'big') # convert integer to byte length 1 with order 'big'
print(integer)
print(byte)

In [None]:
# hex command returns string representation of number in hexadecimal format, not a hex value
# hex values are only interpreted in python as integer
# hex command is mainly used for demonstration and debugging
print(hex(200))
print(type(hex(200)))
print(0xc8)
print(type(0xc8))

In [None]:
# Convert list of integers to bytearray
convert_list = [1, 25, 50, 100, 150]
byte_list = bytearray(convert_list)
print(byte_list)

In [None]:
# Convert integer to bit string
integer = 15
bit_string = format(integer, '08b') # 0 indicates leading zeros, 8 indicates number of digits, b indicates binary
print(bit_string)

In [None]:
# Bit shifting
integer = 16
print(integer >> 4) # Shift right
print(integer << 4) # Shift left

In [None]:
# Combining bits
# | is 'or' operator
high_bits = 0xA
low_bits = 0x6
print(format(high_byte,'04b'))
print(format(low_bits, '04b'))
# Combine to a byte
byte = (high_bits << 4) | low_bits # 'or' operation: 1 if either bit is 1
print(format(byte, '08b'))

In [None]:
# Selecting portion of byte
# & is 'and' operator
byte = 0xA5
print(format(byte, '08b'))
mask = 0xF
masking_byte = byte & mask # 'and' operation: 1 if both bits are 1
print(format(masking_byte, '08b'))

# Scipy Curve Fit <a name="curve_fit"></a>

Fitting function performed by minimizing the summed residuals utilizing non linear least squares

In [None]:
xdata = np.array([1, 2, 3, 4])
ydata = np.array([50, 817, 16216, 325519])

def fit_function(x, a, b, c):
    return a * np.exp(b * x ) + c

initial_guess = [1, 1, 1] # initial guess of a, b, and c
bounds = (0, [1, 0.5, np.inf]) # a bounds: 0 to 1, b bounds: 0 to 0.5, c bounds: 0 to infinity

popt, pcov = sp.optimize.curve_fit(fit_function, # function to optimize, first input must be x data followed by parameters
                                    xdata, # numpy array of x data
                                    ydata, # numpy array of y data
                                    po = initial_guess, # initial guess of parameters, else all are initialized as 1
                                    bounds = bounds) # bounds for each parameter in fitting

print(popt)# popt is optimized parameters a, b, c in an array
# pcov is the covariance of popt, diagonals are the variance of the parameter estimates
perr = np.sqrt(np.diag(pcov)) # standard deviation errors on the parameters

# Exploratory Data Analysis <a name="EDA"></a>

In [None]:
# To use seaborn standard parameters
# API: https://seaborn.pydata.org/api.html
sns.set()

In [None]:
# Load the iris dataset
from sklearn import datasets
data = datasets.load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['species'] = pd.Series(data.target).astype(str)
df['species'] = df['species'].map({"0": "Setosa", "1": "Versicolour", "2": "Virginica"})
df.head()

In [None]:
# Plotting histogram of dataframe data
_ = df.hist(column = 'sepal length (cm)',
            by = 'species',
            figsize = (15,5),
            layout = (1,3),
            bins = 10,
            sharey = True,
            range = (df['sepal length (cm)'].min(), df['sepal length (cm)'].max()))
plt.show()

In [None]:
# Plotting a swarmplot of dataframe data
_ = sns.swarmplot(x = 'species',
                  y = 'sepal length (cm)',
                  data = df)
_ = plt.xlabel('Species')
_ = plt.ylabel('Sepal Length (cm)')
plt.show()

In [None]:
# Cumulative density function with pandas dataframe
_ = df.hist(column = 'sepal length (cm)',
            by = 'species',
            figsize = (15,5),
            layout = (1,3),
            bins = 10,
            sharey = True,
            range = (df['sepal length (cm)'].min(), df['sepal length (cm)'].max()),
            cumulative = True)

In [None]:
# Scatter plot with seaborn
_ = sns.scatterplot(x='sepal length (cm)',
                y='petal length (cm)',
                hue='species',
                size='petal width (cm)',
                data = df)
plt.show()

In [None]:
# Joint plot with seaborn
_ = sns.jointplot('sepal length (cm)', 'petal length (cm)', data=df)
plt.show()

In [None]:
# Pairplot with seaborn
_ = sns.pairplot(df)
plt.show()

In [None]:
# Regression plot with seaborn
_ = sns.regplot(x='sepal length (cm)',
                y='petal length (cm)',
                color='blue',
                order=1,
                data=df,
                label='First Order')
plt.show()

In [None]:
# Residual plot with seaborn
_ = sns.residplot(x='sepal length (cm)',
                  y='petal length (cm)',
                  color='blue',
                  order=1,
                  data=df,
                  label='Residual')
plt.show()

In [None]:
# Linear regression with seaborn, splitting data by color
_ = sns.lmplot(x='sepal length (cm)',
               y='petal length (cm)',
               hue='species',
               data=df)
plt.show()

In [None]:
# Linear regression with seaborn, splitting data by row
_ = sns.lmplot(x='sepal length (cm)',
               y='petal length (cm)',
               row='species',
               data=df)
plt.show()

# If Name is Main <a name="Name_Main"></a>

Use at end of a callable file for testing purposes. Anything under if statement will be run only if the script is ran and not called.

In [None]:
if __name__ == "__main__":