In [1]:
import pandas as pd
import numpy as np

In [2]:
# We want to look at data for the Facebook, Apple, Amazon, Netflix, and Google stocks
# Combine them into a single file and store the dataframe as faang
# 1. read each file in
# 2. add a column to each dataframe, called ticker, indicating the ticker symbol it is for
# 3. append them together into a single dataframe
# 4. save the result in a csv file called faang.csv

In [3]:
aapl = pd.read_csv('aapl.csv')
amzn = pd.read_csv('amzn.csv')
nflx = pd.read_csv('nflx.csv')
goog = pd.read_csv('goog.csv')

In [4]:
aapl['ticker'] = 'AAPL'
amzn['ticker'] = 'AMZN'
nflx['ticker'] = 'NFLX'
goog['ticker'] = 'GOOG'

In [5]:
faang = aapl.append(amzn).append(nflx).append(goog)
faang

Unnamed: 0,date,high,low,open,close,volume,ticker
0,2018-01-02,43.075001,42.314999,42.540001,43.064999,102223600.0,AAPL
1,2018-01-03,43.637501,42.990002,43.132500,43.057499,118071600.0,AAPL
2,2018-01-04,43.367500,43.020000,43.134998,43.257500,89738400.0,AAPL
3,2018-01-05,43.842499,43.262501,43.360001,43.750000,94640000.0,AAPL
4,2018-01-08,43.902500,43.482498,43.587502,43.587502,82271200.0,AAPL
...,...,...,...,...,...,...,...
246,2018-12-24,1003.539978,970.109985,973.900024,976.219971,1590300.0,GOOG
247,2018-12-26,1040.000000,983.000000,989.010010,1039.459961,2373300.0,GOOG
248,2018-12-27,1043.890015,997.000000,1017.150024,1043.880005,2109800.0,GOOG
249,2018-12-28,1055.560059,1033.099976,1049.619995,1037.079956,1414800.0,GOOG


In [6]:
faang.to_csv('faang.csv', index=False)

In [7]:
# With faang, use type conversion to change the date column into a datetime and the volume column into integers.
# Then sort by date and ticker

In [8]:
faang.dtypes

date       object
high      float64
low       float64
open      float64
close     float64
volume    float64
ticker     object
dtype: object

In [9]:
faang = faang.assign(date=lambda x: pd.to_datetime(x.date), \
                     volume=lambda x: x.volume.astype(int)).sort_values(['date', 'ticker'])
faang.head()

Unnamed: 0,date,high,low,open,close,volume,ticker
0,2018-01-02,43.075001,42.314999,42.540001,43.064999,102223600,AAPL
0,2018-01-02,1190.0,1170.51001,1172.0,1189.01001,2694500,AMZN
0,2018-01-02,1066.939941,1045.22998,1048.339966,1065.0,1237600,GOOG
0,2018-01-02,201.649994,195.419998,196.100006,201.070007,10966900,NFLX
1,2018-01-03,43.637501,42.990002,43.1325,43.057499,118071600,AAPL


In [10]:
# Find the seven rows with the highest value for volume
faang.sort_values('volume', ascending=False).head(7)

Unnamed: 0,date,high,low,open,close,volume,ticker
182,2018-09-21,55.34,54.322498,55.195,54.415001,384986800,AAPL
245,2018-12-21,39.540001,37.407501,39.215,37.682499,382978400,AAPL
212,2018-11-02,53.412498,51.357498,52.387501,51.869999,365314800,AAPL
22,2018-02-02,41.700001,40.025002,41.5,40.125,346375200,AAPL
23,2018-02-05,40.970001,39.0,39.775002,39.122501,290954000,AAPL
27,2018-02-09,39.4725,37.560001,39.267502,39.102501,282690400,AAPL
24,2018-02-06,40.93,38.5,38.7075,40.7575,272975200,AAPL


In [11]:
faang.nlargest(7, 'volume')

Unnamed: 0,date,high,low,open,close,volume,ticker
182,2018-09-21,55.34,54.322498,55.195,54.415001,384986800,AAPL
245,2018-12-21,39.540001,37.407501,39.215,37.682499,382978400,AAPL
212,2018-11-02,53.412498,51.357498,52.387501,51.869999,365314800,AAPL
22,2018-02-02,41.700001,40.025002,41.5,40.125,346375200,AAPL
23,2018-02-05,40.970001,39.0,39.775002,39.122501,290954000,AAPL
27,2018-02-09,39.4725,37.560001,39.267502,39.102501,282690400,AAPL
24,2018-02-06,40.93,38.5,38.7075,40.7575,272975200,AAPL


In [12]:
# Use melt() to make it completely long format. 
# Date and ticker are our ID variables. We need to melt the rest so that we don't have separate columns for open, high, low, close and volume

In [13]:
melted_faang = faang.melt(id_vars=['ticker', 'date'], value_vars=['open', 'low', 'high', 'close', 'volume'])
melted_faang


Unnamed: 0,ticker,date,variable,value
0,AAPL,2018-01-02,open,4.254000e+01
1,AMZN,2018-01-02,open,1.172000e+03
2,GOOG,2018-01-02,open,1.048340e+03
3,NFLX,2018-01-02,open,1.961000e+02
4,AAPL,2018-01-03,open,4.313250e+01
...,...,...,...,...
5015,NFLX,2018-12-28,volume,1.099280e+07
5016,AAPL,2018-12-31,volume,1.400140e+08
5017,AMZN,2018-12-31,volume,6.954500e+06
5018,GOOG,2018-12-31,volume,1.493300e+06


The European Centre for Disease Prevention and Control (ECDC) provides an open dataset on COVID-19 cases called, daily number of new reported cases of COVID-19 by country worldwide. This dataset is updated daily, but we will use a snapshot that contains data from January 1, 2020 through September 18, 2020. Clean and pivot the data so that it is in wide format:

    Read in the covid19_cases.csv file.
    Create a date column using the data in the dateRep column and the pd.to_datetime() function.
    Set the date column as the index and sort the index.
    Replace occurrences of United_States_of_America and United_Kingdom with USA and UK, respectively.
    Using the countriesAndTerritories column, filter the data down to Argentina, Brazil, China, Colombia, India, Italy, Mexico, Peru, Russia, Spain, Turkey, the UK, and the USA.
    Pivot the data so that the index contains the dates, the columns contain the country names, and the values are the case counts in the cases column. Be sure to fill in NaN values with 0.


In [17]:
covid19 = pd.read_csv('covid19_cases.csv').assign(date=lambda x: pd.to_datetime(x.dateRep)).set_index('date').replace('United_States_of_America', 'USA').replace('United_Kingdom', 'UK').sort_index()
covid19.head()

Unnamed: 0_level_0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,Cumulative_number_for_14_days_of_COVID-19_cases_per_100000
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-01,01/01/2020,1,1,2020,0,0,Lithuania,LT,LTU,2794184.0,Europe,
2020-01-01,01/01/2020,1,1,2020,0,0,Singapore,SG,SGP,5804343.0,Asia,
2020-01-01,01/01/2020,1,1,2020,0,0,Egypt,EG,EGY,100388076.0,Africa,
2020-01-01,01/01/2020,1,1,2020,0,0,Azerbaijan,AZ,AZE,10047719.0,Europe,
2020-01-01,01/01/2020,1,1,2020,0,0,Switzerland,CH,CHE,8544527.0,Europe,


In [19]:
covid19[covid19.countriesAndTerritories.isin(['Argentina', 'Brazil', 'China', 'Colombia', 'India', 'Italy', 'Mexico', 'Peru', 'Russia', 'Spain', 'Turkey', 'UK', 'USA'])]\
        .reset_index().pivot(index='date', columns='countriesAndTerritories', values='cases').fillna(0)

countriesAndTerritories,Argentina,Brazil,China,Colombia,India,Italy,Mexico,Peru,Russia,Spain,Turkey,UK,USA
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2020-01-02,0.0,0.0,2095.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,2.0,1.0
2020-01-03,0.0,1.0,574.0,0.0,0.0,240.0,2.0,0.0,0.0,28.0,0.0,12.0,3.0
2020-01-04,0.0,1138.0,54.0,108.0,146.0,4053.0,121.0,115.0,501.0,7413.0,2704.0,4273.0,24998.0
2020-01-05,143.0,7218.0,12.0,296.0,1993.0,1872.0,1425.0,3045.0,7099.0,1387.0,2615.0,5442.0,29917.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-05,245.0,5632.0,1.0,550.0,3604.0,744.0,1305.0,1515.0,11656.0,482.0,1114.0,2329.0,18117.0
2020-12-06,1373.0,30412.0,7.0,0.0,10956.0,379.0,4790.0,5965.0,8779.0,502.0,987.0,1199.0,22883.0
2020-12-07,3367.0,39023.0,35.0,4586.0,28637.0,188.0,6094.0,3064.0,6611.0,0.0,1016.0,565.0,63051.0
2020-12-08,7369.0,52160.0,58.0,12830.0,60963.0,412.0,6686.0,6547.0,4945.0,3172.0,1183.0,1148.0,46813.0


In [20]:
# Use data from covid19_total_cases.csv to find the 20 countries with the largest COVID-19 case totals.

pd.read_csv('covid19_total_cases.csv', index_col='index').T.nlargest(20, 'cases').sort_values('cases', ascending=False)


index,cases
USA,6724667
India,5308014
Brazil,4495183
Russia,1091186
Peru,756412
Colombia,750471
Mexico,688954
South_Africa,657627
Spain,640040
Argentina,601700
