# Merging different dataframes 

In [3]:
import pandas as pd 
import pg8000
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, String, MetaData
from datetime import datetime, date, timedelta
import math

In [4]:
connection_def = "postgresql+pg8000://quotes:clue0QS-train@raspberrypi/quotes"
engine = create_engine(connection_def)


In [5]:
universe = [
    { 'isin': 'CA0585861085', 'symbol': 'PO0.F', 'name': "Ballard Power" },
    { 'isin': 'GB00B0130H42', 'symbol': 'IJ8.F', 'name': "ITM Power" },
    { 'isin': 'NO0010081235', 'symbol': 'D7G.F', 'name': "Nel" },
    { 'isin': 'SE0006425815', 'symbol': '27W.F', 'name': "Powercell Sweden" },
    { 'isin': 'US72919P2020', 'symbol': 'PLUN.F','name': "Plug Power" },
    { 'isin': 'NO0003067902', 'symbol': '2HX.F', 'name': "Hexagon Composites" },
    { 'isin': 'FR0000120073', 'symbol': 'AIL.DE','name': "Air Liquide" },
    { 'isin': 'IE00BZ12WP82', 'symbol': 'LIN.F', 'name': "Linde" },
    { 'isin': 'US2310211063', 'symbol': 'CUM.F', 'name': 'Cummins'} ,
    { 'isin': 'FR0011742329', 'symbol': 'M6P.F', 'name': 'McPhy Energy S.A.'},    
    # { 'isin': 'US6541101050', 'name': 'Nikola Corporation','symbol': '8NI.F' }, 
    { 'isin': 'DE000A0HL8N9', 'name': '2G Energy',  'symbol': '2GB.DE' }
]


In [6]:
def read_returns(isin: str, engine) -> pd.DataFrame :
    """Retrieves the returns series for an equity in the database.
    
    Args:
    -----
    isin: 
        The name of the equity (also the name of the underlying database table).
        
    engine:
        The database connection

    Returns:
    --------
    A panda Dataframe object.
    """
    
    df = pd.read_sql(
        isin, 
        engine, 
        index_col='Date', 
        columns=['Date', 'Close'], 
        parse_dates={'Dates': '%Y-%m-%d'}
    )
    df[isin] = df['Close'].pct_change()
    df.drop(['Close'], axis='columns', inplace=True)
    return df

In [7]:
def merge_returns(merged, df):
    if merged is None:
        return df
    else:
        return pd.merge(merged_df, df, how='outer', left_index=True, right_index=True)


In [8]:
merged_df = None
for asset in universe:
    df = read_returns(asset['isin'], engine)
    merged_df = merge_returns(merged_df, df)

merged_df.head()

Unnamed: 0_level_0,CA0585861085,GB00B0130H42,NO0010081235,SE0006425815,US72919P2020,NO0003067902,FR0000120073,IE00BZ12WP82,US2310211063,FR0011742329,DE000A0HL8N9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-01-03,,,,,,,,,,,
2000-01-04,,,,,-0.019231,,,,,,
2000-01-05,,,,,0.0,,,,,,
2000-01-06,,,,,0.588235,,,,,,
2000-01-07,,,,,0.111111,,,,,,


In [9]:
def isNan(num):
    """Checks is num is nan"""
    return num != num

In [10]:
errors = 0
for asset in universe:
    isin = asset['isin']
    df = read_returns(isin, engine)

    # Check if all values of df are represented correctly in merged_df 
    for idx in df.index:
        merged = merged_df.at[idx, isin]
        original=df.at[idx, isin]

        if not isNan(merged) and not isNan(original) and not merged == original:
            print("Big mistake!", idx, isin)
            print("\tmerged  :", merged_df.at[idx, isin])
            print("\toriginal:", df.at[idx, isin])
            errors = errors + 1

print("Found", errors, "errors in the data.")

Found 0 errors in the data.
