In [35]:
import pandas as pd
import numpy as np
import os

csv_files = sorted([os.path.join('data', file) for file in os.listdir('data') if file.endswith('.csv')])
csv_files

['data/AUDUSD-prepared.csv',
 'data/EURUSD-prepared.csv',
 'data/GBPUSD-prepared.csv',
 'data/NZDUSD-prepared.csv',
 'data/USDCAD-prepared.csv',
 'data/USDCHF-prepared.csv',
 'data/USDJPY-prepared.csv']

In [36]:
for file in csv_files:
    df = pd.read_csv(file)
    print(df.shape)

print(df.columns)

(43600, 13)
(43600, 13)
(43598, 13)
(43596, 13)
(43598, 13)
(43601, 13)
(43599, 13)
Index(['Gmt time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Log_Return',
       'SMA_20', 'SMA_50', 'RSI', 'Upper_Bollinger_Band',
       'Lower_Bollinger_Band', 'ATR'],
      dtype='object')


In [37]:
from datetime import datetime

# Define a function to convert the string to only the date format
def convert_to_date(date_str):
    dt = datetime.strptime(date_str, '%d.%m.%Y %H:%M:%S.%f')
    return dt.date()

df_list = [pd.read_csv(file) for file in csv_files]
df_close = []

for df in df_list:
    df['date'] = df['Gmt time'].apply(convert_to_date)
    df = (
        df.loc[:, ['date', 'Close']]
        .groupby('date')['Close']
        .mean()
        .reset_index()
        .sort_values('date')
    )
    print(df.shape)
    df_close.append(df)

df_close[0].head()

(2187, 2)
(2187, 2)
(2187, 2)
(2185, 2)
(2187, 2)
(2187, 2)
(2187, 2)


Unnamed: 0,date,Close
0,2016-01-05,0.71595
1,2016-01-06,0.708835
2,2016-01-07,0.702066
3,2016-01-08,0.701371
4,2016-01-10,0.69353


In [38]:
# 1. Determine the full set of unique dates from all the DataFrames
all_dates = pd.concat([df['date'] for df in df_close]).drop_duplicates().sort_values()

# 2. For each DataFrame, check if any dates from the full set are missing
for i, df in enumerate(df_close):
    missing_dates = all_dates[~all_dates.isin(df['date'])]
    missing_df = pd.DataFrame({'date': missing_dates, 'Close': [float('nan')] * len(missing_dates)})
    df_close[i] = pd.concat([df, missing_df]).sort_values(by='date')

# 3. Join all the DataFrames together by the 'date' column
result_df = df_close[0].copy()
for i, df in enumerate(df_close[1:], 2):
    result_df = pd.merge(result_df, df, on='date', how='outer', suffixes=('', f'_{i}'))

# Now, result_df contains the 'date' column and columns for 'Close' values from each original DataFrame
print(result_df.shape)
result_df.head()

(2187, 8)


Unnamed: 0,date,Close,Close_2,Close_3,Close_4,Close_5,Close_6,Close_7
0,2016-01-05,0.71595,1.07502,1.46733,0.66977,1.39874,1.00859,119.11
1,2016-01-06,0.708835,1.075398,1.464198,0.664492,1.406315,1.008706,118.580333
2,2016-01-07,0.702066,1.085312,1.46022,0.663232,1.41041,1.000696,117.833208
3,2016-01-08,0.701371,1.088425,1.458271,0.660295,1.411128,0.997385,118.059227
4,2016-01-10,0.69353,1.09406,1.451405,0.65164,1.418315,0.99264,116.933


In [39]:
def getNamecsv(csv_file):
    return csv_file[5:].split('-')[0]

currency = [getNamecsv(c) for c in csv_files]
currency.insert(0, 'DATE')
print(currency)

result_df.rename(
    columns={old: new for old, new in zip(result_df.columns, currency)},
    inplace=True
)
result_df.head()

['DATE', 'AUDUSD', 'EURUSD', 'GBPUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'USDJPY']


Unnamed: 0,DATE,AUDUSD,EURUSD,GBPUSD,NZDUSD,USDCAD,USDCHF,USDJPY
0,2016-01-05,0.71595,1.07502,1.46733,0.66977,1.39874,1.00859,119.11
1,2016-01-06,0.708835,1.075398,1.464198,0.664492,1.406315,1.008706,118.580333
2,2016-01-07,0.702066,1.085312,1.46022,0.663232,1.41041,1.000696,117.833208
3,2016-01-08,0.701371,1.088425,1.458271,0.660295,1.411128,0.997385,118.059227
4,2016-01-10,0.69353,1.09406,1.451405,0.65164,1.418315,0.99264,116.933


In [40]:
result_df.to_csv('data/currency.csv', index=False)