# Step 0 - Loading the necessary libraries and setting them up

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sn
import matplotlib.pyplot as plt

import mpld3
mpld3.enable_notebook()

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', 4)
pd.set_option('display.max_columns', 7)

# Step 1 - Data Collection

In [None]:
A = pd.read_csv('A.csv', parse_dates=True, index_col=0)
A

In [None]:
B = pd.read_csv('B.csv', parse_dates=True, index_col=0); 
B

In [None]:
C = pd.read_csv('C.csv', parse_dates=True, index_col=0); 
C

# Step 2 - Data wrangling/munging

In [None]:
merged_df = A.join(B, how='outer', lsuffix='_A', sort=True).join(C, how='outer', lsuffix='_B', rsuffix='_C', sort=True)

merged_df

# Step 3 - Data cleaning

In [None]:
merged_df[merged_df.isnull().all(axis=1)]

In [None]:
merged_df[['Close_A', 'Close_B', 'Close_C']].isnull().any(axis=1).sum()

In [None]:
valid_close_df = merged_df.dropna(subset=['Close_A', 'Close_B', 'Close_C'], how='any')

In [None]:
valid_close_df[['Close_A', 'Close_B', 'Close_C']].isnull().any(axis=1).sum()

In [None]:
valid_close_df

In [None]:
valid_close_df.isnull().any(axis=1).sum()

In [None]:
valid_close_df[valid_close_df.isnull().any(axis=1)]

In [None]:
valid_close_complete = valid_close_df.fillna(method='backfill')

In [None]:
valid_close_complete.isnull().any(axis=1).sum()

# Step 4 - Obtaining Descriptive Statistics

In [None]:
pd.set_option('display.max_rows', None)
valid_close_complete.describe()

In [None]:
pd.set_option('display.max_rows', 2)

In [None]:
prices_only = valid_close_complete.drop(['Volume_A', 'Volume_B', 'Volume_C'], axis=1)

In [None]:
prices_only

# Step 5 - Visual Inspection of the Data

In [None]:
valid_close_complete['Open_A'].plot(figsize=(12,6), linestyle='--', color='green', legend='Open_A')
valid_close_complete['Close_A'].plot(figsize=(12,6), linestyle='-', color='grey', legend='Close_A')
valid_close_complete['Low_A'].plot(figsize=(12,6), linestyle=':', color='black', legend='Low_A')
valid_close_complete['High_A'].plot(figsize=(12,6), linestyle='-.', color='grey', legend='High_A')

In [None]:
valid_close_complete['Open_C'].plot(figsize=(12,6), linestyle='--', color='black', legend='Open_C')
valid_close_complete['Close_C'].plot(figsize=(12,6), linestyle='-', color='grey', legend='Close_C')
valid_close_complete['Low_C'].plot(figsize=(12,6), linestyle=':', color='black', legend='Low_C')
valid_close_complete['High_C'].plot(figsize=(12,6), linestyle='-.', color='grey', legend='High_C')

# Step 6 - Data Cleaning

In [None]:
no_outlier_prices = prices_only[(np.abs(stats.zscore(prices_only)) < 6).all(axis=1)]

In [None]:
no_outlier_prices['Open_C'].plot(figsize=(12,6), linestyle='--', color='black', legend='Open_C')
no_outlier_prices['Close_C'].plot(figsize=(12,6), linestyle='-', color='grey', legend='Close_C')
no_outlier_prices['Low_C'].plot(figsize=(12,6), linestyle=':', color='black', legend='Low_C')
no_outlier_prices['High_C'].plot(figsize=(12,6), linestyle='-.', color='grey', legend='High_C')

In [None]:
pd.set_option('display.max_rows', None)
no_outlier_prices[['Open_C', 'Close_C', 'Low_C', 'High_C']].describe()

In [None]:
pd.set_option('display.max_rows', 12)

# Step 7 - Advanced Visualization Techniques

In [None]:
close_prices = no_outlier_prices[['Close_A', 'Close_B', 'Close_C']]

In [None]:
delta_close_prices = (close_prices.shift(-1) - close_prices).fillna(0)
delta_close_prices.columns = ['Delta_Close_A', 'Delta_Close_B', 'Delta_Close_C']

In [None]:
delta_close_prices

In [None]:
pd.set_option('display.max_rows', None)

In [None]:
delta_close_prices.describe()

In [None]:
delta_close_prices['Delta_Close_A'].plot(kind='hist', bins=100, figsize=(12,6), color='black', grid=True)

In [None]:
delta_close_prices['Delta_Close_B'].plot(kind='box', figsize=(12,6), color='black', grid=True)

In [None]:
pd.plotting.scatter_matrix(delta_close_prices, figsize=(10,10), color='black', alpha=0.75, diagonal='kde', grid=True)

In [None]:
delta_close_prices.corr()

In [None]:
plt.figure(figsize=(6,6))
sn.heatmap(delta_close_prices.corr(), annot=True, square=True, linewidths=2)

# Special Python Libraries for EDA

In [None]:
import dtale
dtale.show(valid_close_df, ignore_duplicate=True)