In [1]:
# invenotries-trained model
import tensorflow as tf

model = tf.keras.models.load_model('./model_2.h5')
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 64)                320       
_________________________________________________________________
dense_16 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 65        
Total params: 4,545
Trainable params: 4,545
Non-trainable params: 0
_________________________________________________________________


In [2]:
import csv
import pprint
import pandas as pd

dataset_dir = '.'

def load_csv(file_name):
    file_path = dataset_dir + '/' + file_name
    df = pd.read_csv(file_name)
    return df


In [3]:
# invenotries-trained dataset
test_dataset = load_csv('test_dataset.csv')

In [4]:
with open('./target_variables.csv') as f:
    target_variables = pd.read_csv(f)

In [5]:
# 転置行列を返す（正規化のため）
test_data_stats = test_dataset.describe()
for t in target_variables:
    test_data_stats.pop(t)
    test_data_stats = test_data_stats.transpose()

test_data_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Economy (GDP per Capita),156.0,0.857895,0.393196,0.0,0.604512,0.896365,1.129018,1.69752
Family,156.0,1.036616,0.306543,0.0,0.804176,1.04528,1.261535,1.624
Health (Life Expectancy),156.0,0.578858,0.255314,0.0,0.369105,0.618092,0.78375,1.141
Freedom,156.0,0.388964,0.165855,0.0,0.27383,0.418136,0.512905,0.696


In [6]:
for t in target_variables:
    test_data_labels = test_dataset.pop(t)

test_data_labels

0      7.284
1      7.226
2      6.901
3      6.798
4      6.575
       ...  
151    4.374
152    4.212
153    4.107
154    3.973
155    2.853
Name: Happiness Score, Length: 156, dtype: float64

In [7]:
# 正規化関数
def norm(x):
  return (x - data_stats['mean']) / data_stats['std']
normed_data = norm(data)
normed_data

NameError: name 'data' is not defined

In [None]:
#実際の値
with tf.name_scope('data'):
    Y = tf.Variable(test_data_labels, name='Y')
    print(Y)

In [None]:
# モデルを使った予測（学習用データ）
with tf.name_scope('predict'):
    pred = model.predict(normed_test_data.values).flatten()
    pred

In [None]:
# RMSEの計算、MAEとMSEの計算と表作成
import pandas as pd
import matplotlib.pyplot as plt

with tf.name_scope('rmse_mae_mse'):
    loss, mae, mse = model.evaluate(normed_test_data, test_data_labels, verbose=2)
    rmse = tf.sqrt(tf.reduce_mean((Y - pred)**2))
    
    csv_data = [['RMSE（Root Mean Square Error）', rmse.numpy()],
           ['MAE（Mean Absolute Error）', mae],
           ['MSE（Mean Squared Error）', mse]]

    with open('evaluation_index_matrix.csv', 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        writer.writerows(csv_data)


In [None]:
df_fact = pd.DataFrame({'measured value': test_data_labels.values, 'predicted value': test_predictions})
df_fact

In [None]:
# 検証用データの予測と実測の相関グラフ
import seaborn as sns

with tf.name_scope('rmse_mae_mse'):
    sns.set_style("whitegrid")
    test_predictions = model.predict(normed_test_data).flatten()
    sns_plt = sns.jointplot("predicted value", "measured value", data=df_fact, kind="reg", line_kws={'color':'red'}, height=8, scatter_kws={"s": 4})
    file_name = ('correlation_graph_of_predicted_and_measured_values.png')
    sns_plt.savefig(file_name)

In [None]:
# 相関関係を描画
import seaborn as sns
with tf.name_scope('correlation graph'):
    data = load_csv('dataset.csv')
    explanatory_variables = data.columns
    for target in target_variables:
        for explanation in data.columns:
            if target == explanation:
                continue
            sns_plt = sns.jointplot(data[target], data[explanation], kind="reg", line_kws={'color':'red'}, height=8, scatter_kws={"s": 4})
            file_name = (target + '-' + explanation + '.png').replace(' ', '_')
            sns_plt.savefig(file_name)

In [None]:
# ペアワイズで説明変数の組み合わせを作り相関グラフを作成する

# !pip install more-itertools
import matplotlib.pyplot as plt

import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D


# ペアワイズで説明変数の組み合わせを作る
from itertools import tee
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

explanatory_variables = [i for i in data.columns if not i in target_variables]
explanatory_comb = pairwise(explanatory_variables)

with tf.name_scope('correlation graph'):
    data = load_csv('dataset.csv')
    for Z in target_variables:
        for X, Y in explanatory_comb:
            sns.set_style("darkgrid")
            fig = plt.figure(figsize = (8, 8))
            ax = Axes3D(fig)

            ax.set_xlabel(X)
            ax.set_ylabel(Y)
            ax.set_zlabel(Z)
            
            #.plotで描画
            ax.plot(data[X], data[Y], data[Z], marker="o", linestyle='None', ms=1)

            #最後に.show()を書いてグラフ表示
            plt.show()
            file_name = (Z + '-' + X + '-' + Y + '.png').replace(' ', '_')
            sns_plt.savefig(file_name)
