In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.dates as mdates

pd.set_option('display.max_columns', None) # pandasオプション：列データを全て表示

# データフレームdfにOur World in Dataのデータを読み込む
df = pd.read_json('https://covid.ourworldindata.org/data/owid-covid-data.json')
df = df[df.columns.drop(list(df.filter(regex='OWID')))] # OWIDデータを除外

In [None]:
# 解析対象の国名コードを指定
country_code = 'USA'

# 国のデータをcountry1へ格納
country1 = pd.DataFrame(df[country_code]['data'])
country1['date'] = pd.to_datetime(country1['date']) # 日付フォーマットを一括変換
country1.set_index('date',inplace = True) # dateをindexに設定

# 日にちデータの表示
country1

In [None]:
# 国が持つ日にちデータのkey一覧
country1.columns

In [None]:
# 解析対象のkeyを指定
key1='new_cases_smoothed'
key2='new_deaths_smoothed'

blue = '#1f77b4'
orange = '#ff7f0e'

# keyの時系列データの表示
ax1 = country1.plot(y=key1, c=blue, legend=False)
ax2 = ax1.twinx() # ax2をy2軸へ
country1.plot(y=key2, ax=ax2, c=orange, legend=False)
ax1.set_title(df[country_code]['location'])
ax1.set_ylabel(key1, color=blue)
ax2.set_ylabel(key2, color=orange)

In [None]:
# keyに関して欠損値NaNを除いたデータフレームcdataを作り直す
cdata = country1.dropna(subset = [key1,key2])
dates = cdata.index

# keyの平均値・分散値
v1_mean = cdata[key1].mean()
v1_std  = cdata[key1].std(ddof=0)
v2_mean = cdata[key2].mean()
v2_std  = cdata[key2].std(ddof=0)

# 相関の計算に必要なデータの抽出
numDates = len(dates)
maxDiffDay = (dates[-1] - dates[0]).days + 1
correlSize = 2*(maxDiffDay - 1) + 1
diffs = np.arange(- (maxDiffDay - 1), maxDiffDay)
correlations = np.zeros([correlSize])
counts = np.zeros([correlSize])

print('記録のある日数: ', numDates)
print('記録の最初の日: ', dates[0].date())
print('記録の最後の日: ', dates[-1].date())
print('最初の日と最後の日の差: ', maxDiffDay)
print('確保する配列の要素数: ', correlSize)

# 相互相関の計算
for i_day in range(numDates):
    for j_day in range(numDates):
        
        # i_dayとj_dayを取り出し日の差をdiffDayに格納
        # （maxDiffDay - 1）は配列の0スタートのためのオフセット
        diffDay = (dates[j_day] - dates[i_day]).days + (maxDiffDay - 1)

        # 相互相関を計算
        correlations[diffDay] += (cdata[key1][i_day] - v1_mean)*(cdata[key2][j_day] - v2_mean)
        
        # 足した回数を記録
        counts[diffDay] += 1

# 相互相関をkey1の標準偏差*key2の標準偏差*足した回数で割る
correlations /= v1_std*v2_std*counts

print('===== 相互相関：計算完了 ===== ')

In [None]:
# グラフを表示する範囲 [diffDayMin, diffDayMax]
diffDayMax = +200
diffDayMin = -200

# グラフを表示する範囲から最大値の場所を抜き出す
diffs = np.array(diffs)
correlations = np.array(correlations)
cliped_diffs = diffs[(diffs > diffDayMin) & (diffs < diffDayMax)]
cliped_correlations = correlations[(diffs > diffDayMin) & (diffs < diffDayMax)]
max_day    = cliped_diffs[cliped_correlations.argmax()]
max_correl = cliped_correlations[cliped_correlations.argmax()]

print('最大の相関 %d 日後：相関値 %.3f' % (max_day, max_correl))

# グラフの表示
fig, ax = plt.subplots()
ax.set_title(df[country_code]['location'])
ax.plot(diffs, correlations)
ax.plot(max_day, max_correl, 'ro')
ax.set_xlabel('diff_day')
ax.set_ylabel('correlation')
ax.set_xlim([diffDayMin, diffDayMax])
plt.show()