<a href="https://colab.research.google.com/github/behnaznikkhah/COVID-19ImpactOnStockMarket/blob/master/Part2_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np 
import matplotlib.ticker as ticker
from statsmodels.graphics import tsaplots
import statsmodels.api as sm
import seaborn as sns
from IPython.display import display, HTML



In [None]:
file_path_covid = os.path.abspath("")
file_path_market = os.path.abspath("")

# Read in the file content in a DataFrame called owid-covid-data
data_covid = pd.read_csv(os.path.join(file_path_covid, 'owid-covid-data.csv'))#, parse_dates=['Date'], index_col='Date')
#display first 5 rows of raw data
display(data_covid.head(5))

In [None]:
#load MAE datasets
file_path = os.path.abspath("")
df_mae = pd.read_excel(os.path.join(file_path, 'final_mae.xlsx'))


In [None]:
# Create a Figure and an Axes 
fig, ax = plt.subplots(figsize=(24,18))
#reshaping and categorizing dataset
mae_cat = df_mae.pivot("country", "scenario", "mae")
#using bar plot to compare scenarios' mae for each country
mae_cat.plot(kind='bar',ax=ax,logy=True,colormap='jet',width=0.8)
#rename lagend labels
plt.legend(['worldwide new cases with price exclusion','worldwide total cases with price exclusion','worldwide new cases with price inclusion','worldwide total cases with price inclusion'],bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.,prop={'size': 15})
#set the plot title 
plt.title('MAE for different scenario using LSTM to predict market in different countries\n',fontsize=30)
#set the axis labels
plt.xlabel('Country',fontsize=15)
plt.ylabel('MAE',fontsize=15)
#set the size of x ticks
plt.xticks(fontsize=15 )
#display the plot
plt.show()

In [None]:
file_path_covid = os.path.abspath("")
file_path_market = os.path.abspath("")

# Read in the file content in a DataFrame called CovidData and MarketData
data_covid = pd.read_csv(os.path.join(file_path_covid, 'Covid.csv'))#, parse_dates=['Date'], index_col='Date')
data_market = pd.read_csv(os.path.join(file_path_market, 'Market.csv'))#, parse_dates=['Date'], index_col='Date')

# Display the first five lines of the DataFrame
display(data_covid.head(5))
print('')
print('')
print('')

display(data_market.head(5))

# Change the order of columns
data_covid=data_covid[['Date','Deaths','ActiveCases','Country']]
data_market=data_market[['Date','Country','Price','Open','Low','High','Volume','Change']]
# Convert the date column to a datestamp type
data_covid['Date'] = pd.to_datetime(data_covid['Date'])
data_market['Date'] = pd.to_datetime(data_market['Date'])
# Set the date column as the index of your DataFrame discoveries

data_covid = data_covid.set_index('Date')
data_market = data_market.set_index('Date')

# Print the data type of each column 
display(data_market.dtypes)
display(data_covid.dtypes)



In [None]:
#join covid data and market data
finaldf = pd.merge(data_market,data_covid , how='inner', on=['Date','Country'])
#drop unnecesssry columns
finaldf = finaldf.drop(['Volume','Low','Open','Change','High'], 1)
#display hesd of final data
display(finaldf.head())


In [None]:
#create a dictionary of indeces information 
index_data = {'Definition':  ['Abu Dhabi Securities Exchange (ADX) Composite Index', 'Cotation Assistée en Continu (CAC) 40 Index', 
            'Deutsche Aktien Xchange (DAX) Performance Index', 'Dow Jones Industrial Average Index', 
            'Jakarta Composite Index', 'Korea Composite Stock Price Index', 'Nikkei 225 Index', 
            'S&P/Australian Securities Exchange (S&P/ASX) 200 Index', 'S&P/Toronto Stock Exchange Composite Index (S&P/TSX) Composite Index', 
            'Shanghai Composite Index', 'National Stock Exchange (NIFTY) 50 Index','Brasil Sao Paulo Stock Exchange Index','South Africa Top 40'],
        'Abbreviation': ['ADX','CAC40','GDAXI','DJIA','JKSE','KOSPI','N225','AXJO','GSPTSE','SSEC','NSEI','BVSP','JTOPI'],
        'Country':['United Arab Emirates','France','Germany','United States','Indonesia','Korea','Japan',
          'Australia','Canada','Shanghai','India','Brazil','South Africa']
        }
#creat a dataframe of indeces information
index_info = pd.DataFrame (index_data, columns = ['Definition','Abbreviation','Country'])
#use style function to set values in the center and hide the index 
style_index_info = index_info.style.set_properties(**{'text-align': 'center'}).hide_index()
style_index_info.render()
#display indeces information
display(style_index_info)

In [None]:
#a dataframe of the number of Nan values
finaldf_nan=finaldf.groupby('Country').apply(lambda x: x.isna().sum())
#Specify the plot style
plt.style.use('_classic_test_patch')
# use bar plot to display nan values in each country 
ax=finaldf_nan[['Deaths','ActiveCases','Price']].plot(kind='bar')
#set the plot title 
plt.title('Number of missing Values for each country\n')
#set axis labels
plt.ylabel('Number of NaN')
#dispaly the plot
plt.show()



In [None]:
#replace nan with 0    
data_market=data_market.fillna(0)
data_covid=data_covid.fillna(0)
finaldf=finaldf.fillna(0)

In [None]:
#use ggplot style
with plt.style.context('ggplot'):
    # Create a Figure and an Axes with plt.subplots
    fig, ax = plt.subplots(figsize=(24, 18), linewidth=2)
    #draw grid
    plt.rcParams["axes.grid"] = True
    #get the name of unique countries
    countrynames = data_market.Country.unique()
    i=1
    #set the plot title
    fig.suptitle('Number of cumulative Corona deaths',fontweight ="bold",fontsize='xx-large')
    #using a for loop to plot all countries data as a subplot
    for country in countrynames:
        #set th eposition of each subplot
        plt.subplot(7,2,i)
        #get the total deaths data
        df_newcases = data_covid[data_covid.Country.eq(country)]['Deaths']
        #use a line plot to display number of deaths
        df_newcases.plot()
        #increase horizental space between each subplot
        plt.subplots_adjust(hspace=0.5)
        #remove subplot axis labels
        plt.ylabel(None)
        plt.xlabel(None)
        #set countries name as the subplot title
        plt.title(f"{countrynames[i - 1]}")
        i=i+1
#set a title for y label
plt.ylabel("Number of deaths",position=(0.06,5),fontsize='x-large',fontweight='black')


In [None]:
#use ggplot style
with plt.style.context('ggplot'):
    # Create a Figure and an Axes with plt.subplots
    fig, ax = plt.subplots(figsize=(24, 18), linewidth=2)
    #draw grid
    plt.rcParams["axes.grid"] = True
    #get the name of unique countries
    countrynames = data_market.Country.unique()
    i=1
    #set the plot title
    fig.suptitle('Number of cumulative Corona active cases',fontweight ="bold",fontsize='xx-large')
    #using a for loop to plot all countries data as a subplot
    for country in countrynames:
        #set th eposition of each subplot
        plt.subplot(7,2,i)
        #get the total active cases data
        df_newcases = data_covid[data_covid.Country.eq(country)]['ActiveCases']
        #use a line plot to display number of active cases
        df_newcases.plot()
        #increase horizental space between each subplot
        plt.subplots_adjust(hspace=0.5)
        #remove subplot axis labels
        plt.ylabel(None)
        plt.xlabel(None)
        #set countries name as the subplot title
        plt.title(f"{countrynames[i - 1]}")
        i=i+1
#set a title for y label
plt.ylabel("Number of active cases",position=(0.06,5),fontsize='x-large',fontweight='black')


In [None]:
#use ggplot style
with plt.style.context('ggplot'):
    # Create a Figure and an Axes with plt.subplots
    fig, ax = plt.subplots(figsize=(24, 18), linewidth=2)
    #draw grid
    plt.rcParams["axes.grid"] = True
    #get the name of unique countries
    countrynames = data_market.Country.unique()
    i=1
    #set the plot title
    fig.suptitle('Number of new Corona deaths',fontweight ="bold",fontsize='xx-large')
    #using a for loop to plot all countries data as a subplot
    for country in countrynames:
        #set th eposition of each subplot
        plt.subplot(7,2,i)
        #get the number of new deaths for each day
        df_newcases = data_covid[data_covid.Country.eq(country)]['Deaths'].diff()
        #use a line plot to display number of new deaths
        df_newcases.plot()
        # Compute the 30 days rolling mean of the number of new deaths
        df_deaths_rolling = data_covid[data_covid.Country.eq(country)]['Deaths'].diff().rolling(window=30).mean()
        # Plot the mean of new deaths for last 30 days
        df_deaths_rolling.plot()
        #increase horizental space between each subplot
        plt.subplots_adjust(hspace=0.5)
        #remove subplot axis labels
        plt.ylabel(None)
        plt.xlabel(None)
        #set countries name as the subplot title
        plt.title(f"{countrynames[i - 1]}")
        i=i+1
#set a title for y label
plt.ylabel("Number of deaths",position=(0.06,5),fontsize='x-large',fontweight='black')


In [None]:
#use ggplot style
with plt.style.context('ggplot'):
    # Create a Figure and an Axes with plt.subplots
    fig, ax = plt.subplots(figsize=(24, 18), linewidth=2)
    #draw grid
    plt.rcParams["axes.grid"] = True
    #get the name of unique countries
    countrynames = data_market.Country.unique()
    i=1
    #set the plot title
    fig.suptitle('Number of new Corona active caes',fontweight ="bold",fontsize='xx-large')
    #using a for loop to plot all countries data as a subplot
    for country in countrynames:
        #set th eposition of each subplot
        plt.subplot(7,2,i)
        #get the number of new active cases for each day
        df_newcases = data_covid[data_covid.Country.eq(country)]['ActiveCases'].diff()
        #use a line plot to display number of new active cases
        df_newcases.plot()
        # Compute the 30 days rolling mean of the number of new active cases
        df_deaths_rolling = data_covid[data_covid.Country.eq(country)]['ActiveCases'].diff().rolling(window=30).mean()
        # Plot the mean of new active cases for last 30 days
        df_deaths_rolling.plot()
        #increase horizental space between each subplot
        plt.subplots_adjust(hspace=0.5)
        #remove subplot axis labels
        plt.ylabel(None)
        plt.xlabel(None)
        #set countries name as the subplot title
        plt.title(f"{countrynames[i - 1]}")
        i=i+1
#set a title for y label
plt.ylabel("Number of active cases",position=(0.06,5),fontsize='x-large',fontweight='black')


In [None]:
#use ggplot style
with plt.style.context('ggplot'):
    # Create a Figure and an Axes with plt.subplots
    fig, ax = plt.subplots(figsize=(24, 18), linewidth=2)
    #draw grid
    plt.rcParams["axes.grid"] = False
    #get the name of unique countries
    countrynames = data_market.Country.unique()
    i=1
    #set the plot title
    fig.suptitle('Stock market Index price',fontweight ="bold",fontsize='xx-large')
    #using a for loop to plot all countries data as a subplot
    for country in countrynames:
        #set the position of each subplot
        plt.subplot(7,2,i)
        #get index price for each country
        df_price = data_market[data_market.Country.eq(country)]['Price']
        #add a horizental line on 20th Fab
        df_price.plot().axvline('2020-02-20', color='red', linestyle='--')
        #add a vertical line on 20th Fab
        df_price.plot().axhline(data_market[data_market.Country.eq(country)]['Price']['2020-02-20'].values[0], color='green', linestyle='--')
        # rotate and align the tick labels
        fig.autofmt_xdate() 
        #remove subplot axis labels
        plt.ylabel(None)
        plt.xlabel(None)
        #set countries name as the subplot title
        plt.title(f"{countrynames[i - 1]}")
        i=i+1
#set y label title 
plt.ylabel("close price",position=(0.06,5),fontsize='x-large',fontweight='black')


In [None]:
# Create a Figure 
fig=plt.figure(figsize=(24, 24))
i=1
for country in countrynames:
    #set the position of each subplot
    plt.subplot(7, 2, i)
    #get index price for each country
    df_price = data_market[data_market.Country.eq(country)]['Price']
    #plot distribution of index price
    sns.distplot(df_price, bins=100, color='purple')
    #remove subplot axis labels
    plt.ylabel('')
    plt.xlabel('')
    #set countries name as the subplot title
    plt.title(f'{countrynames[i - 1]}')
    plt.subplots_adjust(hspace=0.5)
    i+=1
#set y label title 
fig.suptitle('Stock market Index price distribution',position=(0.5,.93),fontweight ="semibold",fontsize='x-large')


In [None]:
#using seaborns box plot
ax = sns.boxplot(y="Country", x="ActiveCases", data=finaldf,orient="h")
#set plot title
ax.set_title('Number of Corona active cases in each country')


In [None]:
#using seaborns box plot
ax = sns.boxplot(y="Country", x="Deaths", data=finaldf,orient="h")
#set plot title
ax.set_title('Number of Corona deaths in each country')


In [None]:
#get the name of unique countries
countrynames = data_market.Country.unique()
i=1
#using a for loop to plot all countries data
for country,i in zip(countrynames, range(1,14)):
  #get index price for each country
  df_price=data_market[data_market.Country.eq(country)]['Price']
  # Perform time series decompositon
  decomposition = sm.tsa.seasonal_decompose(df_price_canada,model='multiplicative', freq = 30)
  #plot decomposition series
  fig=decomposition.plot()
  #set supplots title 
  fig.suptitle(f"Stock market Index price decomposition ({countrynames[i - 1]})",position=(.6,1.1),fontweight ="semibold",fontsize='large')
  #display plot
  plt.show()


In [None]:
#get the name of unique countries
countrynames = data_market.Country.unique()
i=1
#using a for loop to plot all countries data
for country,i in zip(countrynames, range(1,14)):
    #get world-wise covid data
    data_covid_world=data_covid[data_covid.Country.eq('World')]
    #get market data for each country
    data_market_country=data_market[data_market.Country.eq(country)]
    #join world-wise covid data and market data
    finaldf2 = pd.merge(data_market_country,data_covid_world , how='inner', on=['Date'])
    #drop unwanted column
    finaldf2= finaldf2.drop(['Volume','Low','Open','Change','High'], 1)
    #fill nan values with 0
    finaldf2=finaldf2.fillna(0)
    #add new active cases as a new column
    finaldf2['New-ActiveCases']=finaldf2['ActiveCases'].diff()
    #add new deaths as a new column
    finaldf2['New-Deaths']=finaldf2['Deaths'].diff()
    # Get correlation matrix 
    corr_result =finaldf2.corr(method='spearman')
    # Customize the heatmap of the  correlation matrix and rotate the x-axis labels
    fig = sns.clustermap(corr_result,
                        row_cluster=True,
                        col_cluster=True,
                        figsize=(10, 10))
    #set plot title
    fig.fig.suptitle(f"Features correlation ({countrynames[i - 1]})",position=(0.6,1.1),fontweight ="semibold",fontsize='large')
    #rotate axis tick labels
    plt.setp(fig.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.show()

In [None]:
#get the name of unique countries
countrynames = data_market.Country.unique()
i=1
#using a for loop to plot all countries data
for country,i in zip(countrynames, range(1,14)):
    #get country-wise covid data
    data_covid_world=data_covid[data_covid.Country.eq(country)]
    #get market data for each country
    data_market_country=data_market[data_market.Country.eq(country)]
    #join country-wise covid data and market data
    finaldf2 = pd.merge(data_market_country,data_covid_world , how='inner', on=['Date'])
    #drop unwanted column
    finaldf2= finaldf2.drop(['Volume','Low','Open','Change','High'], 1)
    #fill nan values with 0
    finaldf2=finaldf2.fillna(0)
    #add new active cases as a new column
    finaldf2['New-ActiveCases']=finaldf2['ActiveCases'].diff()
    #add new deaths as a new column
    finaldf2['New-Deaths']=finaldf2['Deaths'].diff()
    #using pairplot to show feautures relationship
    fig = sns.pairplot(finaldf2, kind='reg')
    #set supplots title 
    fig.fig.suptitle(f"Features relationship ({countrynames[i - 1]})",position=(0.6,1.1),fontweight ="semibold",fontsize='large')
    #display plot
    plt.show()