-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.py
114 lines (103 loc) · 3.82 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#Load format data from text
#Author: Bo Liu
#Date: 2013.10.15
import numpy as np
import os
def normalize(fea):
return (fea-fea.min()) / (fea.max() - fea.min())
def analysis(Tr_sim, Tr_fea1, Tr_fea2, Tst_fea1, Tst_fea2, GD_Path):
#num_img_fea = 500
#num_tag_fea = 100
path = os.getcwd()
#Construct the path
Tr_sim_path = os.path.join(path, Tr_sim)
Tr_img_path = os.path.join(path, Tr_fea1)
Tr_tag_path = os.path.join(path, Tr_fea2)
#Tst_sim_path = os.path.join(path, Tst_sim)
Tst_img_path = os.path.join(path, Tst_fea1)
Tst_qa_path = os.path.join(path, Tst_fea2)
gd_path = os.path.join(path, GD_Path)
Tr_sim = np.loadtxt(Tr_sim_path)
Tr_img = np.loadtxt(Tr_img_path)
Tr_tag = np.loadtxt(Tr_tag_path)
Tst_img = np.loadtxt(Tst_img_path)
Tst_qa = np.loadtxt(Tst_qa_path)
gd = np.loadtxt(gd_path)
##Read
#Tr_sim_file = open(Tr_sim_path).readlines()
#Tr_img_file = open(Tr_img_path).readlines()
#Tr_tag_file = open(Tr_tag_path).readlines()
#
##Tst_sim_file = open(Tst_sim_path).readlines()
#Tst_img_file = open(Tst_img_path).readlines()
#Tst_qa_file = open(Tst_qa_path).readlines()
#gd_file = open(gd_path).readlines()
#
##Init matrix
#Tr_sim = np.zeros((len(Tr_img_file), len(Tr_sim_file))) #the similarity between image and tag in 170*170
#Tr_img = np.zeros((len(Tr_img_file), num_img_fea))
#Tr_tag = np.zeros((len(Tr_tag_file), num_tag_fea))
#
#Tst_sim = np.zeros((len(Tst_img_file), len(Tst_qa_file)))
#Tst_img = np.zeros((len(Tst_img_file), num_img_fea))
#Tst_qa = np.zeros((len(Tst_qa_file), num_tag_fea))
#gd = np.zeros((len(gd_file), len(Tst_qa_file)))
#
##QA feature matrix #qa * # bag of word
#for i in range(len(Tst_qa_file)):
# line = Tst_qa_file[i].split('\t')
# for j in range(Tst_qa.shape[1]):
# Tst_qa[i, j] = float(line[j])
#
# #Ground Truth
#for i in range(len(gd_file)):
# line = gd_file[i].split('\t')
# for j in range(len(line)):
# gd[i, j] = int(line[j])
#
##Tst similarit
##for i in range(len(Tst_sim_file)):
# #line = Tst_sim_file[i].split('\t')
# #for j in range(len(line)):
# #Tst_sim[i, j] = float(line[j])
#
##Tst Images
#for i in range(len(Tst_img_file)):
# line = Tst_img_file[i].split(' ')
# print 'line len', len(line)
# for j in range(len(line)):
# Tst_img[i, j] = float(line[j])
#
##Train similarity matrix #image * #tag
#for i in range(len(Tr_sim_file)):
##similarity = line.split('\t')
##Tr_sim[int(similarity[0])-1][int(similarity[1])-1] = float(similarity[2])
# line = Tr_sim_file[i].split('\t')
# for j in range(len(line)):
# Tr_sim[i, j] = float(line[j])
#
# #Image feature n*500
#for i in range(len(Tr_img_file)):
# line = Tr_img_file[i].split('\t')
# for j in range(len(line)):
# #if (':') in feature:
# #fea_idx = feature.split(':')[0]
# #fea_val = feature.split(':')[1]
# #Tr_img[i][int(fea_idx) - 1] = int(fea_val)
# Tr_img[i, j] = float(line[j])
#
# #Tag feature n*1000
#for i in range(len(Tr_tag_file)):
# line = Tr_tag_file[i].split('\t')
# for j in range(len(line)):
# #if (':') in feature:
# #fea_idx = feature.split(':')[0]
# #fea_val = feature.split(':')[1]
# Tr_tag[i, j] = float(line[j])
#Return the data set matrix that is dimension \times instance
#Ground Truth is images \times QA
Tr_img = normalize(Tr_img)
Tr_tag = normalize(Tr_tag)
Tst_img = normalize(Tst_img)
Tst_qa = normalize(Tst_qa)
return [Tr_sim, Tr_img.transpose(), Tr_tag.transpose(), Tst_img.transpose(), Tst_qa.transpose(), gd]