In [1]:
import sys
import os
import math
import random
import heapq 
import time
import copy
import gc
import numpy as np
import pandas as pd
from functools import reduce
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import pdist
from sklearn.metrics import confusion_matrix,roc_curve,accuracy_score,auc 
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import faiss 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import baostock as bs#pip install baostock
import mplfinance as mpf #pip install mplfinance
from matplotlib.pylab import date2num
import datetime
import smtplib
from email.mime.text import MIMEText
from email.header import Header
torch.cuda.set_device(0)
print (torch.cuda.current_device())

Loading faiss with AVX2 support.


0


In [4]:
#Generate Dataset
root_dir = '/data/fjsdata/qtsys/img/' #the path of images
data = pd.read_csv('/data/fjsdata/qtsys/label.csv') 
data['label'] = data['label'].fillna('H')
data=data[data['label']!='H']
print(data.shape)
data = data.sample(frac=1).reset_index(drop=True) #shuffle
#Dataset
X, Y = [],[]
for _, row in data.iterrows():
    try:
        image_path = os.path.join(root_dir, row['name'])
        img = cv2.resize(cv2.imread(image_path).astype(np.float32), (256, 256))#(1600,800,3)->(256,256,3)
        X.append(img)
        if row['label']=='B':
            Y.append(0) #buy
        else:# row['label']=='S':
            Y.append(1) #sell
    except:
        print(iname+":"+str(image_path))
    sys.stdout.write('\r{} / {} '.format(len(Y),data.shape[0]))
    sys.stdout.flush()

#split trainset and testset 
trI, teI, trY, teY = train_test_split(X, Y, test_size=0.1, random_state=42) #list after return
print('The length of train set is %d'%len(trI))
print('The length of test set is %d'%len(teI))

class SpatialAttention(nn.Module):#spatial attention layer
    def __init__(self):
        super(SpatialAttention, self).__init__()

        self.conv1 = nn.Conv2d(2, 1, kernel_size=3, padding=1, bias=False)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv1(x)
        return self.sigmoid(x)
    
class ResBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, stride=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels, out_channels=out_channels,
                kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )

        self.downsample_layer = None
        self.do_downsample = False
        if in_channels != out_channels or stride != 1:
            self.do_downsample = True
            self.downsample_layer = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 3, stride, 1, bias=False),
                nn.BatchNorm2d(out_channels),
            )

        # initialize weights
        self.apply(self.init_weights)

    def forward(self, x):
        identity = x
        out = self.net(x)

        if self.do_downsample:
            identity = self.downsample_layer(x)

        return F.relu(out + identity, inplace=True)

    @staticmethod
    def init_weights(m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            
class ASHNet(nn.Module):
    def __init__(self, code_size: int):
        super().__init__()
        #Resnet
        self.net = nn.Sequential(
            ResBlock(in_channels=3, out_channels=16),
            ResBlock(in_channels=16, out_channels=16),
            ResBlock(in_channels=16, out_channels=16, stride=2),
        ) 
        #Attention 
        self.sa = SpatialAttention() 
        #fully connected
        self.linear = nn.Sequential(
            #nn.Linear(16*128*128, 4096),
            #nn.ReLU(inplace=True),
            nn.Linear(16*128*128, code_size),
            #nn.ReLU(inplace=True) #nn.Tanh()#[-1,1]
        )
        
        # initialize weights
        self.apply(self.init_weights)

    def forward(self, x):
        x = self.net(x)
        x = self.sa(x)*x
        x = x.view(x.size(0),-1)
        x = self.linear(x)
        return x

    @staticmethod
    def init_weights(m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)

            
class HashLossFunc(nn.Module):
    def __init__(self, margin=0.5, alpha=0.01):
        super(HashLossFunc, self).__init__()
        self.alpha = alpha #regularization
        self.margin = margin #margin threshold
        self.mse_loss = nn.MSELoss(reduction='none')
        self.l1_loss = nn.L1Loss(reduction='mean')
    
    def forward(self,h1,h2,y):    
        margin_val = self.margin * h1.shape[1]
        squared_loss = torch.mean(self.mse_loss(h1, h2), dim=1)
        # T1: 0.5 * (1 - y) * dist(x1, x2)
        positive_pair_loss = (0.5 * (1 - y) * squared_loss)
        mean_positive_pair_loss = torch.mean(positive_pair_loss)
        # T2: 0.5 * y * max(margin - dist(x1, x2), 0)
        zeros = torch.zeros_like(squared_loss)
        marginMat = margin_val * torch.ones_like(squared_loss)
        negative_pair_loss = 0.5 * y * torch.max(zeros, marginMat - squared_loss)
        mean_negative_pair_loss = torch.mean(negative_pair_loss)

        # T3: alpha(dst_l1(abs(x1), 1)) + dist_l1(abs(x2), 1)))
        mean_value_regularization = self.alpha * (
                self.l1_loss(torch.abs(h1), torch.ones_like(h1)) +
                self.l1_loss(torch.abs(h2), torch.ones_like(h2)))

        loss = mean_positive_pair_loss + mean_negative_pair_loss + mean_value_regularization
        return loss

#Generate image pairs for model
def onlineGenImgPairs():
    spls = len(trY)-1
    idx_sf = random.sample(range(0, spls),spls)
    trI1_sf, trI2_sf, trY1_sf, trY2_sf = [],[],[],[]
    flag = 0
    for i in idx_sf:
        if flag==0:
            trI1_sf.append(trI[i])
            trY1_sf.append(trY[i])
            flag =1
        else:
            trI2_sf.append(trI[i])
            trY2_sf.append(trY[i])
            flag =0
    trY_sf = np.where((np.array(trY1_sf)-np.array(trY2_sf))!=0,1,0)
    return np.array(trI1_sf),np.array(trI2_sf),trY_sf


#define model
hash_size=12
model = ASHNet(code_size=hash_size).cuda()
criterion  = HashLossFunc(margin=0.5).cuda() #define loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) #define optimizer
#train model
best_net, best_loss = None, float('inf')
batchSize = 10
for epoch in range(50):#iteration
    trI1_sf, trI2_sf, trY_sf = onlineGenImgPairs()
    losses = []
    num_batches = len(trY_sf) // batchSize +1
    for i in range(num_batches):
        optimizer.zero_grad()#grad vanish
        min_idx = i * batchSize
        max_idx = np.min([len(trY_sf), (i+1)*batchSize])
        I1_batch = torch.from_numpy(trI1_sf[min_idx:max_idx]).type(torch.FloatTensor).cuda()
        I2_batch = torch.from_numpy(trI2_sf[min_idx:max_idx]).type(torch.FloatTensor).cuda()
        Y_batch = torch.from_numpy(trY_sf[min_idx:max_idx]).type(torch.FloatTensor).cuda()
        #forword
        X1_batch = model(I1_batch.permute(0, 3, 1, 2))#permute the dims of matrix
        X2_batch = model(I2_batch.permute(0, 3, 1, 2))
        #binary-like loss
        loss = criterion(X1_batch,X2_batch,Y_batch)
        #backward
        loss.backward()
        #update parameters
        optimizer.step()
        #show loss
        sys.stdout.write('\r {} / {} : loss = {}'.format(i+1, num_batches, float('%0.6f'%loss.item())))
        sys.stdout.flush()     
        losses.append(loss.item())
    print("Eopch: %5d mean_loss = %.6f" % (epoch + 1, np.mean(losses)))
    if np.mean(losses) < best_loss:
        best_loss = np.mean(losses)
        best_net = copy.deepcopy(model)
print("best_loss = %.6f" % (best_loss))

#release gpu memory
model = model.cpu()
loss=loss.cpu()
torch.cuda.empty_cache()
#hash code of train data from model
#torch.cuda.synchronize()
batchSize = 10
num_batches = len(trI) // batchSize +1
trF = []
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(trI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(trI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = best_net(I_batch.permute(0, 3, 1, 2))#forword
    #X_batch = torch.sign(torch.tanh(X_batch))
    X_batch = torch.tanh(X_batch)
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    trF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()
    
#hash code of test data from model
#torch.cuda.synchronize()
teF = []
num_batches = len(teI) // batchSize + 1
for i in range(num_batches):
    min_idx = i * batchSize
    max_idx = np.min([len(teI), (i+1)*batchSize])
    I_batch = torch.from_numpy(np.array(teI[min_idx: max_idx])).type(torch.FloatTensor).cuda()
    X_batch = best_net(I_batch.permute(0, 3, 1, 2))#forword
    #X_batch = torch.sign(torch.tanh(X_batch))
    X_batch = torch.tanh(X_batch)
    I_batch = I_batch.cpu()
    X_batch = X_batch.cpu()
    torch.cuda.empty_cache()#release gpu memory
    teF.extend(X_batch.data.numpy().tolist())
    sys.stdout.write('\r {} / {} '.format(i, num_batches))
    sys.stdout.flush()

# buliding index of trainset
tstart = time.time()
cpu_index = faiss.IndexFlatL2(hash_size) #
gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) #make all gpu usable
gpu_index.add(np.ascontiguousarray(trF, dtype=np.float32)) #add data(must be float32) to index
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))
#performance
scores, neighbors = gpu_index.search(np.ascontiguousarray(teF, dtype=np.float32), k=1) #return top1
y_pred = []
for i in neighbors.flatten():
    y_pred.append(np.array(trY)[i]) #label of top1
print ( 'Accuracy: %.6f'%accuracy_score(teY, y_pred))
#confusion matrix
labels = list(set(teY))
cm = confusion_matrix(teY, y_pred, labels=labels ) #labels=[B,S]
print (cm)
print ('Sensitivity of B: %.6f'%float(cm[0][0]/np.sum(cm[0])))
print ('Sensitivity of S: %.6f'%float(cm[1][1]/np.sum(cm[1])))

(371, 2)
371 / 371 The length of train set is 333
The length of test set is 38
 17 / 17 : loss = 0.682281Eopch:     1 mean_loss = 1.112316
 17 / 17 : loss = 1.082869Eopch:     2 mean_loss = 1.490716
 17 / 17 : loss = 0.807407Eopch:     3 mean_loss = 1.073985
 17 / 17 : loss = 0.882276Eopch:     4 mean_loss = 0.562639
 17 / 17 : loss = 0.841838Eopch:     5 mean_loss = 0.499972
 17 / 17 : loss = 0.072625Eopch:     6 mean_loss = 0.270806
 17 / 17 : loss = 0.153274Eopch:     7 mean_loss = 0.268146
 17 / 17 : loss = 0.101438Eopch:     8 mean_loss = 0.148876
 17 / 17 : loss = 0.169425Eopch:     9 mean_loss = 0.154675
 17 / 17 : loss = 0.074092Eopch:    10 mean_loss = 0.092092
 17 / 17 : loss = 0.038789Eopch:    11 mean_loss = 0.073155
 17 / 17 : loss = 0.031585Eopch:    12 mean_loss = 0.051518
 17 / 17 : loss = 0.050454Eopch:    13 mean_loss = 0.143127
 17 / 17 : loss = 0.322315Eopch:    14 mean_loss = 0.094282
 17 / 17 : loss = 0.027282Eopch:    15 mean_loss = 0.088943
 17 / 17 : loss = 0.0

In [22]:
#PHA
def avhash(im): #Perceptual hash algorithm
    if not isinstance(im, Image.Image):
        im = Image.open(im)
    im = im.resize((16, 16), Image.ANTIALIAS).convert('L')
    avg = reduce(lambda x, y: x + y, im.getdata()) / 64.
    return reduce(lambda x, yz: x | (yz[1] << yz[0]),
                  enumerate(map(lambda i: 0 if i < avg else 1, im.getdata())),
                  0)

root_dir = '/data/fjsdata/qtsys/img/' #the path of images
data = pd.read_csv('/data/fjsdata/qtsys/label.csv') 
data['label'] = data['label'].fillna('H')
data=data[data['label']!='H']
print(data.shape)
data = data.sample(frac=1).reset_index(drop=True) #shuffle
#Dataset
X, Y = [],[]
for _, row in data.iterrows():
    try:
        image_path = os.path.join(root_dir, row['name'])
        img = cv2.resize(cv2.imread(image_path,cv2.IMREAD_GRAYSCALE).astype(np.float32), (128, 128))
        X.append(img.flatten())
        if row['label']=='B':
            Y.append(0) #buy
        else: #row['label']=='S':
            Y.append(1) #sell
    except:
        print(iname+":"+str(image_path))
    sys.stdout.write('\r{} / {} '.format(len(Y),data.shape[0]))
    sys.stdout.flush()

#split trainset and testset 
trI, teI, trY, teY = train_test_split(X, Y, test_size=0.1, random_state=42) #list after return
print('The length of train set is %d'%len(trI))
print('The length of test set is %d'%len(teI))

# buliding index of trainset
tstart = time.time()
cpu_index = faiss.IndexFlatL2(128*128) #
gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) #make all gpu usable
gpu_index.add(np.ascontiguousarray(trI, dtype=np.float32)) #add data(must be float32) to index
elapsed = time.time() - tstart    
print('Completed buliding index in %d seconds' % int(elapsed))
#performance
scores, neighbors = gpu_index.search(np.ascontiguousarray(teI, dtype=np.float32), k=1) #return top1
y_pred = []
for i in neighbors.flatten():
    y_pred.append(np.array(trY)[i]) #label of top1
print ( 'Accuracy: %.6f'%accuracy_score(teY, y_pred))
#confusion matrix
labels = list(set(teY))
cm = confusion_matrix(teY, y_pred, labels=labels ) 
print (cm)
print ('Sensitivity of B: %.6f'%float(cm[0][0]/np.sum(cm[0])))
print ('Sensitivity of S: %.6f'%float(cm[1][1]/np.sum(cm[1])))

(371, 2)
371 / 371 The length of train set is 333
The length of test set is 38
Completed buliding index in 1 seconds
Accuracy: 0.868421
[[ 7  4]
 [ 1 26]]
Sensitivity of B: 0.636364
Sensitivity of S: 0.962963


In [17]:
#send email: https://zhuanlan.zhihu.com/p/24180606
mail_host = 'smtp.163.com'  
mail_user = 'sharpsword'
mail_pass = 'fjs231104' 
sender = 'sharpsword@163.com' 
receivers = ['36370408@qq.com']
message = MIMEText('content','plain','utf-8')
message['Subject'] = 'Test2' 
message['From'] = sender 
message['To'] = receivers[0]  

try:
    smtpObj = smtplib.SMTP_SSL(mail_host)
    #smtpObj = smtplib.SMTP()
    #smtpObj.connect(mail_host,25)
    smtpObj.login(mail_user,mail_pass) 
    smtpObj.sendmail(sender,receivers,message.as_string()) 
    smtpObj.quit() 
    print('success')
except smtplib.SMTPException as e:
    print('error',e) #打印错误

success
