# Merging DataFrames with pandas


In [1]:
from glob import glob
import pandas as pd

## Reading DataFrames from multiple files in a loop

In [2]:
# Create the list of file names: filenames
filenames = ['data/summer-olympic-medals/Gold.csv', 
             'data/summer-olympic-medals/Silver.csv', 
             'data/summer-olympic-medals/Bronze.csv']

# Create the list of three DataFrames: dataframes
dataframes = [pd.read_csv(f) for f in filenames]

# Print top 5 rows of 1st DataFrame in dataframes
print(dataframes[0].head())

   NOC         Country   Total
0  USA   United States  2088.0
1  URS    Soviet Union   838.0
2  GBR  United Kingdom   498.0
3  FRA          France   378.0
4  GER         Germany   407.0


## Reindexing DataFrame from a list

In [3]:
weather1 = pd.DataFrame({'Mean TemperatureF': [61.95604396, 32.13333333, 33.9218486, 86.3194814]})
weather1.index = ['Mar','Apr','May','Jun']

In [4]:
year = ['Jan', 'Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

# Reindex weather1 using the list year: weather2
weather2 = weather1.reindex(year)

# Print weather2
print(weather2)

# Reindex weather1 using the list year with forward-fill: weather3
weather3 = weather1.reindex(year).ffill() # forward-fill the null entries when upsampling

# Print weather3
print(weather3)

     Mean TemperatureF
Jan                NaN
Feb                NaN
Mar          61.956044
Apr          32.133333
May          33.921849
Jun          86.319481
Jul                NaN
Aug                NaN
Sep                NaN
Oct                NaN
Nov                NaN
Dec                NaN
     Mean TemperatureF
Jan                NaN
Feb                NaN
Mar          61.956044
Apr          32.133333
May          33.921849
Jun          86.319481
Jul          86.319481
Aug          86.319481
Sep          86.319481
Oct          86.319481
Nov          86.319481
Dec          86.319481


## Broadcasting in arithmetic formulas

In [5]:
# Read 'GDP.csv' into a DataFrame: gdp
gdp = pd.read_csv('data/GDP/gdp_usa.csv', parse_dates = True, index_col = 'DATE')

# Slice all the gdp data from 2008 onward: post2008
post2008 = gdp.loc['2008':]

# Print the last 8 rows of post2008
print(post2008.tail(8))

# Resample post2008 by year, keeping last(): yearly
yearly = post2008.resample('A').last()

# Print yearly
print(yearly)

# Compute percentage growth of yearly: yearly['growth']
yearly['growth'] = yearly.pct_change() * 100

# Print yearly again
print(yearly)

              VALUE
DATE               
2014-07-01  17569.4
2014-10-01  17692.2
2015-01-01  17783.6
2015-04-01  17998.3
2015-07-01  18141.9
2015-10-01  18222.8
2016-01-01  18281.6
2016-04-01  18436.5
              VALUE
DATE               
2008-12-31  14549.9
2009-12-31  14566.5
2010-12-31  15230.2
2011-12-31  15785.3
2012-12-31  16297.3
2013-12-31  16999.9
2014-12-31  17692.2
2015-12-31  18222.8
2016-12-31  18436.5
              VALUE    growth
DATE                         
2008-12-31  14549.9       NaN
2009-12-31  14566.5  0.114090
2010-12-31  15230.2  4.556345
2011-12-31  15785.3  3.644732
2012-12-31  16297.3  3.243524
2013-12-31  16999.9  4.311144
2014-12-31  17692.2  4.072377
2015-12-31  18222.8  2.999062
2016-12-31  18436.5  1.172707


In [6]:
# Read 'sp500.csv' into a DataFrame: sp500
sp500 = pd.read_csv('data/sp500.csv', parse_dates=True, index_col = 'Date')

# Read 'exchange.csv' into a DataFrame: exchange
exchange = pd.read_csv('data/exchange.csv', parse_dates=True, index_col = 'Date')

# Subset 'Open' & 'Close' columns from sp500: dollars
dollars = sp500[['Open','Close']]

# Print the head of dollars
print(dollars.head())

# Convert dollars to pounds: pounds
pounds = dollars.multiply(exchange['GBP/USD'], axis = 'rows')

# Print the head of pounds
print(pounds.head())

                   Open        Close
Date                                
2015-01-02  2058.899902  2058.199951
2015-01-05  2054.439941  2020.579956
2015-01-06  2022.150024  2002.609985
2015-01-07  2005.550049  2025.900024
2015-01-08  2030.609985  2062.139893
                   Open        Close
Date                                
2015-01-02  1340.364425  1339.908750
2015-01-05  1348.616555  1326.389506
2015-01-06  1332.515980  1319.639876
2015-01-07  1330.562125  1344.063112
2015-01-08  1343.268811  1364.126161
