In [5]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib inline 
import matplotlib.pyplot as plt
import pandas as pd


# Data Analysis With Python for Excel Users II - Perform Excel Function in Python

<!-- PELICAN_BEGIN_SUMMARY -->

When we computed the mean, minimum and maximum of the data in Python, we are employing a technique to verify the data distribution.
This can be very effective when we need to quickly verify datasets for financial analysis.

<!-- PELICAN_END_SUMMARY -->

## Apply Python Codes to Verfiy the Accuracy of the Datasets.
- Sum up columns or rows in python and save the file in Excel
- Transpose columns
- Get the maximum, mean, average value in the datasets 
- Filter data to isolate invalid data 

### Example - Dow Jone Index and DJ Index 30  from 1985 to 2017    
- Sum up columns income1, income2 and income3 
- Insert a new column Total in Python

In [6]:
df = pd.read_excel('data/Dow.xlsx')
df.head()

Unnamed: 0,Stock Symbol,Company Name,Dividend Yield,Closing Price,Annualized Dividend,Ex-Div Date,Pay Date,50-day moving average,200-day moving average
0,WMT,Wal-Mart Stores,0.0242,85.91,2.08,2018-12-06,2019-01-02,82.35,78.68
1,V,Visa,0.007,119.78,0.84,2018-02-15,2018-03-06,106.44,98.9627
2,VZ,Verizon,0.0496,47.58,2.36,2018-04-09,2018-05-01,48.73,47.03
3,UNH,UnitedHealth Group,0.0135,221.9,3.0,2018-03-08,2018-03-20,198.85,187.48
4,UTX,United Technologies,0.0229,122.45,2.8,2018-02-15,2018-03-10,116.43,118.62


### Which Stock Has the Maximum Dividend Yield?

In [7]:
max_div_stock=df.iloc[df["Dividend Yield"].idxmax()]
max_div_stock
print("The stock with the max dividend yield is %s with yield %s" % (max_div_stock['Company Name'],max_div_stock['Dividend Yield']))

The stock with the max dividend yield is Verizon with yield 0.0496


### Which Stock Has the Minimum Dividend Yield?

In [8]:
min_div_stock=df.iloc[df["Dividend Yield"].idxmin()]
min_div_stock
print("The stock with the minimum dividend yield is %s with yield %s" % (min_div_stock['Company Name'],min_div_stock['Dividend Yield']))

The stock with the minimum dividend yield is Visa with yield 0.007


### Which Stock Price is Currently Below 50-day Moving Average?

- Calculate the difference between 50-day moving average and the closing price 
- Filter the difference below zero, this showed a list of stocks below 50-day average

In [9]:
df["Dif 50-day"] = df["50-day moving average"] - df["Closing Price"]  
df["Dif 50-day"].min()  
df[df["Dif 50-day"] <=0].head()

Unnamed: 0,Stock Symbol,Company Name,Dividend Yield,Closing Price,Annualized Dividend,Ex-Div Date,Pay Date,50-day moving average,200-day moving average,Dif 50-day
0,WMT,Wal-Mart Stores,0.0242,85.91,2.08,2018-12-06,2019-01-02,82.35,78.68,-3.56
1,V,Visa,0.007,119.78,0.84,2018-02-15,2018-03-06,106.44,98.9627,-13.34
3,UNH,UnitedHealth Group,0.0135,221.9,3.0,2018-03-08,2018-03-20,198.85,187.48,-23.05
4,UTX,United Technologies,0.0229,122.45,2.8,2018-02-15,2018-03-10,116.43,118.62,-6.02
5,TRV,Travelers Co.,0.0211,136.59,2.88,2018-03-08,2018-03-30,125.36,124.75,-11.23


### Insert Column and Assign Value in Python Instead of in Excel

** Insert new column after columns 2 with 5% Commission Rate 5% **

In [10]:
df.insert(2,"Com Rate %", 5)
df.head()

Unnamed: 0,Stock Symbol,Company Name,Com Rate %,Dividend Yield,Closing Price,Annualized Dividend,Ex-Div Date,Pay Date,50-day moving average,200-day moving average,Dif 50-day
0,WMT,Wal-Mart Stores,5,0.0242,85.91,2.08,2018-12-06,2019-01-02,82.35,78.68,-3.56
1,V,Visa,5,0.007,119.78,0.84,2018-02-15,2018-03-06,106.44,98.9627,-13.34
2,VZ,Verizon,5,0.0496,47.58,2.36,2018-04-09,2018-05-01,48.73,47.03,1.15
3,UNH,UnitedHealth Group,5,0.0135,221.9,3.0,2018-03-08,2018-03-20,198.85,187.48,-23.05
4,UTX,United Technologies,5,0.0229,122.45,2.8,2018-02-15,2018-03-10,116.43,118.62,-6.02


### Save the calculation and the newly created column to the Excel file

In [11]:
df.to_excel('data/Dow.xlsx')
df.tail()   

Unnamed: 0,Stock Symbol,Company Name,Com Rate %,Dividend Yield,Closing Price,Annualized Dividend,Ex-Div Date,Pay Date,50-day moving average,200-day moving average,Dif 50-day
25,CAT,Caterpillar Inc.,5,0.0213,146.79,3.12,2018-04-20,2018-05-19,127.15,112.13,-19.64
26,BA,Boeing Co.,5,0.0209,327.36,6.84,2018-02-08,2018-03-02,255.39,218.144,-71.97
27,AAPL,Apple Inc.,5,0.0146,172.44,2.52,2018-02-09,2018-02-15,156.641,152.61,-15.799
28,AXP,American Express,5,0.0153,91.6,1.4,2018-04-05,2018-05-10,90.34,84.1304,-1.26
29,MMM,3M,5,0.0254,214.33,5.44,2018-02-15,2018-03-12,216.33,206.69,2.0


 * two new columns created in python is saved in the Excel file 

### Count Rows and Columns When Dealing With Hugh Dataset

In [12]:
Count_Row=df.shape[0] 
Count_Col=df.shape[1] 
print(Count_Row)
print(Count_Col)

30
11


### Sum up total for specific columns

In [13]:
sum_row=df[["Dividend Yield","Closing Price"]].sum()
sum_row

Dividend Yield       0.7648
Closing Price     3513.1300
dtype: float64

### Transpose columns
- This code is very helpful for reader to view the report

In [14]:
df_sum=pd.DataFrame(data=sum_row).T
df_sum  #easy to transpost the sum

Unnamed: 0,Dividend Yield,Closing Price
0,0.7648,3513.13
