# Section 2 : Downloading data

## Getting hands on the right library and table

In [None]:
import wrds
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()

In [None]:
db=wrds.Connection(wrds_username='arthurdhonneur')

In [None]:
db.list_libraries()

In [None]:
db.list_tables(library='wrdsapps_windices')


In [None]:
df = db.get_table(library='wrdsapps_windices', table='mwcountryreturns')

df.head()

In [None]:
#---------------------------------------------
# Value Weighted Index Returns
#---------------------------------------------
Rm=db.raw_sql("""select  date,vwretd from crsp.msi 
                where date>='2002-01-01' and date<='2024-12-31'
                """,date_cols=['date'])
Rm = Rm.rename(columns={'vwretd':'Rm'})

Rm.head()

In [None]:
#---------------------------------------------
# Risk Free Rate 
#---------------------------------------------
Rf=db.raw_sql("""select  mcaldt,tmytm 
           from crsp.tfz_mth_rf            
            where kytreasnox = 2000001 
           and mcaldt>='2002-01-01'
            and mcaldt<='2024-12-31'""", date_cols=['mcaldt'])
Rf['tmytm']=Rf['tmytm']/12/100
Rf=Rf.rename(columns={ "mcaldt": "date","tmytm": "rf"})

Rf.tail()

## Looking at the fx data


In [None]:
import glob

paths_fx = glob.glob('data/fx/*.csv')

for path in paths_fx:
    curr = path.split('/')[-1].split('.')[0]
    print(f'the currency :{curr}')
    
    df_tamp = pd.read_csv(path)
    print(f'the len of the data :{len(df_tamp)}')
    print('--'*20)

## Looking at the interbank data

In [None]:
paths_rates = glob.glob('data/interbank_rates/*.csv')

for path in paths_rates:
    curr = path.split('/')[-1].split('.')[0]
    print(f'the currency :{curr}')
    
    df_tamp = pd.read_csv(path)
    print(f'the len of the data :{len(df_tamp)}')
    print('--'*20)

## Working with the right library / table

In [None]:
df = df[(df['date'] >= '2002-01-01') & (df['date'] <= '2024-12-31')]

list_countries = ['AUS', 'FRA', 'DEU', 'JPN', 'CHE','GBR']
df = df[df['fic'].isin(list_countries)]

df.sample(10)

In [None]:
df.head(10)

In [None]:
# Get the list of unique countries
countries = df['country'].unique()

# List to hold individual country DataFrames
country_dfs = []

for country in countries:

    print(f'Processing country: {country}')
    # Filter for the specific country
    df_country = df[df['country'] == country].copy()
    
    print(f'Number of rows for {country}: {len(df_country)}')
    
    # Drop the 'country' column as it's now implicit
    df_country = df_country.drop(columns=['country'])
    df_country['date'] = Rm['date'] 
    
    # Rename columns to include country suffix, except 'date'
    df_country = df_country.rename(columns={col: f"{col}_{country}" for col in df_country.columns if col != 'date'})
    
    # Append to list
    country_dfs.append(df_country)

# Merge all country-specific DataFrames on 'date'
from functools import reduce
df_merged = reduce(lambda left, right: pd.merge(left, right, on='date', how='outer'), country_dfs)

# Optional: sort by date
df_merged = df_merged.sort_values(by='date')

# Display the result
print(df_merged.head())


In [None]:
df_merged.head()

In [None]:
df[df['fic'] == 'AUS']['date']