In [1]:
# import matplotlib
# matplotlib.use('TkAgg')
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.tools import inspect_checkpoint as chkp
import os

MODEL_SAVE_PATH = "Reg_Model"
MODEL_NAME = "model.ckpt"
SUMMARY_PATH = "Reg_Logs"
TRAINING_STEPS = 100

# 随机生成若干个点，围绕在y=0.1x+0.3的直线周围
num_points = 100
vectors_set = []
for i in range(num_points):
    x1 = np.random.normal(0.0, 0.55)
    y1 = x1 * 0.1 + 0.3 + np.random.normal(0.0, 0.03)
    vectors_set.append([x1, y1])

# 生成一些样本
x_data = [v[0] for v in vectors_set]
y_data = [v[1] for v in vectors_set]

plt.scatter(x_data, y_data, c='r')
plt.show()

<Figure size 640x480 with 1 Axes>

## Generate Different Version of ckpt

In [16]:
# 生成1维W矩阵，取值是[-1, 1]之间的随机数
W = tf.Variable(tf.random_uniform([1], -1.0, 1.0), name='W')
# 生成1维b矩阵，初始值是0
b = tf.Variable(tf.zeros([1]), name='b')
# 经过计算取得预估值y
x = tf.placeholder(tf.float32, name= "input")
y = tf.add(tf.multiply(W, x_data), b, name= "output")

with tf.name_scope("loss_function"): 
    # 以预估值y和实际值y_data之间的均方误差作为损失
    loss = tf.reduce_mean(tf.square(y - y_data), name='loss')
with tf.name_scope("training"): 
    # 采用梯度下降法来优化参数
    optimizer = tf.train.GradientDescentOptimizer(0.5)
    # 训练的过程就是最小化这个误差值
    train = optimizer.minimize(loss, name='train')

sess = tf.Session()        #这种定义session的方法也可以，但是不推荐。
init = tf.global_variables_initializer()
sess.run(init)

# 初始化的w和b是多少
print("before training: ")
print("W=", sess.run(W), "b=", sess.run(b), "loss=", sess.run(loss))

saver = tf.train.Saver()
for step in range(3):
    sess.run(train, feed_dict={x: x_data})
    saver.save(sess, 
               os.path.join(MODEL_SAVE_PATH, MODEL_NAME), 
               global_step= step)


before training: 
W= [-0.7421026] b= [0.] loss= 0.2474321


## make the ckpts readable

In [3]:
#######################
#### 这个方法得到的模型不见得是对的，
#### 建议：为了实验结果的精确，千万不要使用该方法
#######################
def transform_saved_graph_to_readable(model_path):
    for model in os.listdir(model_path):
        if ".meta" in model:
            saver = tf.train.import_meta_graph(os.path.join(model_path, model),
                                               clear_devices=True)
            saver.export_meta_graph(os.path.join(model_path, model) + ".json", 
                                    as_text=True)
            
transform_saved_graph_to_readable(MODEL_SAVE_PATH)

## diff the models: graph and variable
* graph: ignore_order = true
* variable: ignore_order = false

In [4]:
import re, json
from deepdiff import DeepDiff

def my_obj_pairs_hook(lst):
    result={}
    count={}
    for key,val in lst:
        if key in count:
            count[key]=1+count[key]
        else:
            count[key]=1
        if key in result:
            if count[key] > 2:
                result[key].append(val)
            else:
                result[key]=[result[key], val]
        else:
            result[key]=val
    return result

def formalize_json(file):
    # model graph保存的文件不是正常的json格式，
    # 这个方法将.meta转为正常json文件。
    p1 = re.compile("[a-zA-Z_]+: \"")
    p2 = re.compile("[a-zA-Z_]+ {")
    p3 = re.compile("[a-zA-Z_]+: [\w+-]+")

    lines = open(file, "r").readlines()
    new_line = ""
    for i in range(len(lines) - 1):
        line = lines[i]
        nline = lines[i + 1]
        m1 = p1.findall(line)
        m2 = p2.findall(line)
        m3 = p3.findall(line)
        l = line
        if l[-1] == "\n":
            l = l[0:-1]
        #remove the last "\n"

        if len(m2) > 0:
            ori = m2[0].split()[0]
            new = "\"%s\":" %(ori)
            l = l.replace(ori, new)
            new_line += l + "\n"
            continue
        elif len(m1) > 0:
            ori = m1[0].split(":")[0]
            new = "\"%s\"" % (ori)
            new1 = l.strip().replace((ori + ": "), "").replace("\\", ">|<") ##这里要慎重
            if ori == "tensor_content":
                new1 = "\"________\""##################new1.replace("\\", "_")
            l = new + ": " + new1
            new_line += l + ("," if nline.strip() != "}" else "") + "\n"
            continue
        elif len(m3) > 0:
            ori = m3[0].split(": ")
            new = ["\"%s\"" %(i) for i in ori]
            l = ": ".join(new)
            new_line += l + ("," if nline.strip() != "}" else "") + "\n"
            continue
        else:
            new_line += l + ("," if nline.strip() != "}" else "") + "\n"
    new_line += lines[-1]
    return json.loads("{" + new_line + "}", object_pairs_hook=my_obj_pairs_hook)

def find_from_dict(dict, string):
    # string looks like: "root['PyTorch_CPU_MKL_Notebook']['awsmpConfig']['operatingSystems']['AMAZONLINUX']['aadistributionName']",
        tmp = dict
        p = re.compile("\'\w+\'")
        for i in p.findall(string):
            tmp = tmp[i[1:-1]]
        return tmp

def show_diff_result(last_versions, curr_versions, result_json):
    # 能够把一些diff结果显示出来
    # result_json是他俩的原始diff结果的json形式
    keys = result_json.keys()
    for key in keys:
        if key == "dictionary_item_added":
            rms = result_json[key]["py/set"]
            tmp_dict = {}
            for rm in rms:
                tt = find_from_dict(curr_versions, rm)
                tmp_dict[rm] = tt
            result_json[key] = tmp_dict
            pass
        elif key == "dictionary_item_removed":
            ads = result_json[key]["py/set"]
            tmp_dict = {}
            for ad in ads:
                tt = find_from_dict(last_versions, ad)
                tmp_dict[ad] = tt
            result_json[key] = tmp_dict
            pass
        else:
            pass
    return result_json

def diff_models(model1_dir, model2_dir):
    model1 = formalize_json(model1_dir)
    model2 = formalize_json(model2_dir)
    result = json.loads(DeepDiff(model1, model2).json)#, ignore_order=True
    
    # with open("diff_result.json", "w") as dr:
    #     json.dump(show_diff_result(model1, model2, result), dr, sort_keys=True, indent=4)
    
    return result
    
def variable_to_json(model_path):
    reader = tf.train.NewCheckpointReader(model_path)
    global_variables = reader.get_variable_to_shape_map()
    
    item = {}
    for variable_name in global_variables:
        item[variable_name] = {"shape": global_variables[variable_name],
                               "value": reader.get_tensor(variable_name).tolist()}
    
    # with open(NAME + ".json", "w") as v:
    #     json.dump(item, v, sort_keys= True, indent= 4)#

    return item

def diff_varialbes(model1_dir, model2_dir):
    return json.loads(DeepDiff(
                variable_to_json(model1_dir),
                variable_to_json(model2_dir)
    ).json)
        
graph_dirs = []
for name in os.listdir(MODEL_SAVE_PATH):
    if ".json" in name:
        graph_dirs.append(name)

for i in range(len(graph_dirs) - 1):      
    print("graph diff between ckpt %d and %d" %(i, i + 1)) 
    graph_diff = diff_models(os.path.join(MODEL_SAVE_PATH, graph_dirs[i]), 
                os.path.join(MODEL_SAVE_PATH, graph_dirs[i + 1]))
    with open("graph_diff %d and %d.json" %(i, i + 1), "w") as d:
        json.dump(graph_diff, d, sort_keys= True, indent=2)

    m1 = ".".join(graph_dirs[i].split(".")[0:2])
    m2 = ".".join(graph_dirs[i + 1].split(".")[0:2])
    print("variable diff between ckpt %d and %d" %(i, i + 1))
    variable_diff = diff_varialbes(os.path.join(MODEL_SAVE_PATH, m1), 
                       os.path.join(MODEL_SAVE_PATH, m2))
    with open("variable_diff %d and %d.json" %(i, i + 1), "w") as d:
        json.dump(variable_diff, d, sort_keys= True, indent=2)

graph diff between ckpt 0 and 1


variable diff between ckpt 0 and 1
graph diff between ckpt 1 and 2


variable diff between ckpt 1 and 2
