### Pandas Data Analysis


In [1]:
# Use "pip install pandas" in terminal to install pandas
import pandas as pd 

In [2]:
# Let's define a two-dimensional Pandas DataFrame from a python dictionary 
# We will use Pandas DataFrame constructor method "pd.DataFrame()" to create our Pandas DataFrame
# Data Source: https://statisticstimes.com/economy/projected-world-gdp-ranking.php

GDP_df = pd.DataFrame({'Country ID': ['USA', 'CHN' , 'IND', 'ARE', 'CAN', 'MEX'], 
                       'Country':['United States', 'China', 'India', 'United Arab Emirates', 'Canada', 'Mexico'], 
                       'GDP Per Capita [$]':[69375, 11891, 2116, 43538, 52791, 9967], 
                       'Global Rank':[5, 64, 150, 24, 15, 72]})
GDP_df

Unnamed: 0,Country ID,Country,GDP Per Capita [$],Global Rank
0,USA,United States,69375,5
1,CHN,China,11891,64
2,IND,India,2116,150
3,ARE,United Arab Emirates,43538,24
4,CAN,Canada,52791,15
5,MEX,Mexico,9967,72


In [4]:
# Let's obtain the data type of this pandas DataFrame 
print(type(GDP_df))
# Let's view the first couple of rows using ".head()" method
print(GDP_df.head(2))
# Let's view the last couple of rows using ".tail()" method
print(GDP_df.tail(3))

<class 'pandas.core.frame.DataFrame'>
  Country ID        Country  GDP Per Capita [$]  Global Rank
0        USA  United States               69375            5
1        CHN          China               11891           64
  Country ID               Country  GDP Per Capita [$]  Global Rank
3        ARE  United Arab Emirates               43538           24
4        CAN                Canada               52791           15
5        MEX                Mexico                9967           72


0    69375
1    11891
2     2116
3    43538
4    52791
5     9967
Name: GDP Per Capita [$], dtype: int64

In [5]:
# You can access a specific column in the Pandas DataFrame using the header name  
GDP_df['GDP Per Capita [$]']

0    69375
1    11891
2     2116
3    43538
4    52791
5     9967
Name: GDP Per Capita [$], dtype: int64

In [6]:
# Obtain DataFrame information using the info() method
GDP_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Country ID          6 non-null      object
 1   Country             6 non-null      object
 2   GDP Per Capita [$]  6 non-null      int64 
 3   Global Rank         6 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 320.0+ bytes


In [7]:
# Stock prices obtained from Yahoo Finance as of Oct 17th, 2022
portfolio_df = pd.DataFrame({'company name': ['Alphabet', 'Netflix', 'Bank of America', 'Boeing', 'Apple'],
                             'stock ticker symbol':['GOOG', 'NFLX', 'BOA', 'BA', 'AAPL'],
                             'price per share [$]':[99, 245, 33, 136, 142],
                             'number of shares':[2, 7, 15, 30, 22]})
portfolio_df

Unnamed: 0,company name,stock ticker symbol,price per share [$],number of shares
0,Alphabet,GOOG,99,2
1,Netflix,NFLX,245,7
2,Bank of America,BOA,33,15
3,Boeing,BA,136,30
4,Apple,AAPL,142,22


In [8]:
# Obtain a statistical summary
portfolio_df.describe()

Unnamed: 0,price per share [$],number of shares
count,5.0,5.0
mean,131.0,15.2
std,77.087612,11.256109
min,33.0,2.0
25%,99.0,7.0
50%,136.0,15.0
75%,142.0,22.0
max,245.0,30.0


In [13]:
# Multiply the "price per share" by the "number of stocks"
stocks_dollar_value = portfolio_df['price per share [$]'] * portfolio_df['number of shares']
print(stocks_dollar_value)
# Sum up all values
print(stocks_dollar_value.sum())
# Print the output
print('The total value of the portfolio = ${}'.format(stocks_dollar_value.sum()))

0     198
1    1715
2     495
3    4080
4    3124
dtype: int64
9612
The total value of the portfolio = $9612
