In [None]:
import numpy as np
import pandas as pd
df = pd.read_csv("stock_data.csv",parse_dates=['date'])

# Logical Statements

Booleans are the backbone of logical statements.

In [None]:
# Using the int function
int(3.14)

In [None]:
# understanding the bool function - similar to int(object)
bool(10)
# unless we pass 0, None, [], (), {} - bool function will always return True

In [None]:
# Generating booleans
a = 5
b = 7
print(a>b)

In [None]:
# Value of boolean
1 == True

### If Statement

In [None]:
# if condition : 
    # Perform action (this section of code is indented)

In [None]:
# Using booleans in if statement : Syntax if True : execute; if False : exit
if a<b : 
    print("a is less than b")

In [None]:
# If any non-zero value is always True
if 3 :
    print("something")

In [None]:
# Re-evaluating the NumPy .all function
my_array = np.array([1,2,3])
print((my_array>0).all()) # .all checks if all the values are True - inspiration for 'and' statement

In [None]:
# Pandas .all and .any functions
df1 = pd.Series([1,2,3])
(df1>0)#.any()

### And Statement

Allows us to compare multiple booleans. "And" statement gets executed only if all the conditions are True.

In [None]:
# Combining two true statements by and
True and True

In [None]:
# Combining a true and a false boolean
True and False

In [None]:
# More than 2 conditions
a = 1
b = 2
c = 3
if (a==1) and (b==2) and (c==3):
    print('success')

In [None]:
# Again any value other than zero are treated as true by Python
if 3 and 4 and 5 : print('success')

### Or Statement

For the or statement to get executed, at least one of the conditions must be true.

In [None]:
# Combining two true booleans
True or True 

In [None]:
# Combining a true and a false boolean
True or False

In [None]:
# More than 2 conditions
a = 1
b = 2
c = 3
if (a==1) or (b==1) or (c==1):print('success')

### If, Elif, and Else

In [None]:
# if test1:
    # if test1 is true, do this
# elif test2:
    # if test2 is true, do this
# else : 
    # if none of the above are true, then do this

In [None]:
# A simple example of if, elif, and else statement
a = 10

if a<10 : 
    print('A')
elif (a>=10) and (a<100) : 
    print('B')
else :
    print('C')

# Loops

Python has two loop commands - For and While loop. Operations on individual elements can be generalized using a loop in Python.

### For Loop

In [None]:
# Defining an array and iterating through the array
my_array = np.array([1,2,3,4,5])

# Example of an opeartion 
(my_array[2]+my_array[0])**(my_array[3]-my_array[0])

In [None]:
# Simple syntax of for loop
# Similar to if-statement, for loop starts with first indentation and ends at first un-indented line
for element in my_array:
    print(element)

In [None]:
# range function - similar to np.arange()
range(0,5) # gives us a range object in the range class

In [None]:
list(range(10,20,2))

In [None]:
np.arange(10,20,2)

In [None]:
# For loop with range
for i in range(0,len(my_array)):
    print(my_array[i])

### While Loop

In [None]:
# As long as a condition is true, While loop is executed
i=0
while i<6:
    print(i)
    i=i+1 # Can be replaced with i=i+0.5

In [None]:
# While loops with "break"
# Break statement essentially stops running the loop if encountered
i=0
while i<5:
    if i==3:
        break
    print(i)
    i=i+1

In [None]:
# While loops with "continue"
# Continue statement skips 
i=0
while i<5:
    i=i+1 # Does not matter where i=i+1 is present inside the indentation of while loop
    if i==3:continue # should be present before our mathematical operation 
    print(i+i**2+i**3)

### List Comprehension

Instead of entering data/objects inside [.] for generating lists, in list comprehensions you enter for loops and if statements within [.] to evaluate your expression. 

In [None]:
# Creating a new list with squares of first 5 integers
my_list1 = [] # Initiating an empty list
for i in range(10):
    my_list1.append(i**2) # Appending the square of the integer
my_list1

In [None]:
# An alternate concise way to create lists without calling a for loop and storing output
# Syntax is [expression for items in collection if <test condition>]
my_list2 = [x**2 for x in range(10)] # saves us creating a blank list, indendations and appending
my_list2

In [None]:
# Adding an if statement (calculating squares of even numbers only)
my_list3 = [x**2 for x in range(10) if x%2==0] 
my_list3

# Functions

Calculations and logical operations that need to be used repeatedly can be done so using a function.

In [None]:
# Syntax for defining a function
# def function (#arguments) : return
# Similar to loops, code is grouped inside a function through indentation
def my_function():
    pass 

In [None]:
# Checking the output of the function created above
my_function
#my_function()

In [None]:
# Creating a function that does something
from math import pi
def area(x):
    """" Function returns the area of the circle """
    return 0.5*pi*x**2

In [None]:
# Reading the docstring of the function
area?
# area(5)
# area(-5) # not good programming practice

In [None]:
# Error when we don't pass a required variable "x"
area() 

In [None]:
# Using the function in a list comprehension
[area(r) for r in range(10)]

In [None]:
# Passing a numpy array inside the function
area(np.array([1,2,3]))

In [None]:
# Modifying the area function
def area(x=10):
    """" Function returns the area of the circle """
    return 0.5*pi*x**2

In [None]:
# Running function without specifying the key-word argument "x"
area() 

In [None]:
# Creating a function with required and key-word arguments
def mfe_co_2021(input1,input2=5): # first required arguments and then key word arguments
    """This function returns the product of two numbers"""
    return input1*input2

In [None]:
# Calling the function
mfe_co_2021(10.5,10)

In [None]:
type(mfe_co_2021)

### Lambda - Anonymous Functions

The lambda function can take many inputs, but can only have one output.

In [None]:
# Syntax for the lambda function
# lambda inputs : output
lambda x : print(x) # A single line function only

In [None]:
# Defining a function that squares a number
def square(x) : return x**2
square(3)

In [None]:
# Creating a lambda function
# We are assigning the lambda function to x; So x is a function with no name
lambda i : i**2
#x = lambda i : i**2
#x(3)

In [None]:
# Passing a numpy array through the lambda function
my_array = np.linspace(0,5,6)
my_array
fun = lambda x : np.exp(x)
fun(my_array)

### Pandas Apply

The apply function can be used on a pandas series and on a pandas dataframe. Usually, groupby, apply, and the lambda functions go together.

In [None]:
# Creating a random dataframe
df = pd.DataFrame(np.linspace(0,10,100).reshape(20,5),columns=["A B C D E".split()])
df.head()

In [None]:
# Apply function on a pandas data series
df['A'].apply(np.exp) # calculating the exponential at each value of series A

In [None]:
# Apply function on a pandas data frame
df[['A','B','C']].apply(np.max,axis=1).head() # axis=0 is along the column, axis=1 is along each row

In [None]:
# Reloading the dataset analyzed above (shortcut to parse the dates while reading the data)
df = pd.read_csv("stock_data.csv",sep = ',',parse_dates=['date'])

# Converting any letters into NaN
df['price'] = pd.to_numeric(df['price'],errors='coerce')
df['total_returns'] = pd.to_numeric(df['total_returns'],errors='coerce')

df.head(10)

In [None]:
# Defining a function that we want to apply on the dataframe
df.groupby('permno')[['price']].apply(lambda x : x.max()/x.min())

In [None]:
# Creating a custom function 
def modify_return(x):
    return (x.mean()-x.median())/x.std()

In [None]:
df.groupby('permno')[['total_returns']].apply(lambda x : modify_return(x))

In [None]:
# Apply function along the rows
#df[['price','share_outstanding']].apply(lambda x : x['price']*x['share_outstanding'],axis=1)

# Plotting

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Defining our two arrays 
x = np.linspace(-10,10,21)
y = x**2

In [None]:
plt.plot(x,y)
# The x-axis is the default y-index
# First line above the plot (image location)
# Include x in the second part of plot
# Include label and legend
# Include plt.show to remove the first line 
# Change line color
# Add a Z variable
# Change line width
# Change line transparency using alpha
# Change line type by adding '--'
# Add title
# Add x-label
# Add x-ticks
# Save figure

In [None]:
# We can also plot directly from pandas
df[df['permno']<=10029].groupby(['date','permno'])['total_returns'].mean().unstack().cumsum().plot()

### Subplots

In [None]:
fig,ax = plt.subplots(2,2)

# Randomly defining 4 y-variables for subplots
x = np.linspace(-10,10,21)
y1 = np.exp(x)
y2 = np.sin(x)
y3 = np.power(x,3)
y4 = 10*x**2

# First subplot
ax[0,0].plot(x,y1)

# Second subplot
ax[0,1].plot(x,y2)

# Third subplot
ax[1,0].plot(x,y3)

# Fourth subplot
ax[1,1].plot(x,y4)

plt.show()

# Take Home Questions

To keep you thinking about Python.

- **List Comprehension**: First generate a list using the range(20) function. Next, create a second list that calculates $0.5 \cdot n \cdot (n+1)$ for each item in the first list. Solve this using a while loop and also the list comprehension method. Check if the outputs match.


- **Fibonacci Sequence**: Construct a function "fibonacci" that takes in the required variable integer "$n$" and returns the $n_{th}$ term in a Fibonacci sequence. For example, if your call your function fibonacci(n=6) it should return the value 8. You can use a for or a while loop inside your function. Print the function output for n=30, 50, and 100. 


- **Pandas Rolling** is a very useful function to know (check out the doc string of pandas.Series.rolling for a better understanding of what the function does). In this problem, we will calculate the 1-year rolling sum of returns for the data in *stock_data.csv* file. Clean the data (remove strings and drop NaN values). Use the pandas rolling function (window=12) to calculate the sum of 1-year returns. 


- **Plot and Subplots** - For the 4 PERMNOs - (10137, 10051, 10057, 10028), calculate the cumulative sum of returns and plot them in a 4-by-4 sub plot. Also, plot all their cumulative sum of returns in one plot.