In [1]:
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
import ast
import datetime as dt
import matplotlib.pyplot as plt

import seaborn as sns
import cv2
import pandas as pd
import numpy as np

import glob

import numpy as np
from PIL import Image, ImageDraw

from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import train_test_split

import logging
logging.basicConfig(filename='example.log',level=logging.DEBUG)



In [2]:
CLASSES_CSV = glob.glob('../input/train_simplified/*.csv')
CLASSES = [x.split('/')[-1][:-4] for x in CLASSES_CSV]

# 读取单个csv文件
def read_df(path, nrows):
    print('Reading...', path)
    if isinstance(nrows, int):
        return pd.read_csv(path, nrows=nrows, parse_dates=['timestamp'])
    else:
        return pd.read_csv(path, parse_dates=['timestamp'])

# 读取多个csv文件
def contcat_df(paths, nrows):
    dfs = []
    for path in paths:
        dfs.append(read_df(path, nrows))
    return pd.concat(dfs, axis=0, ignore_index=True)

df = contcat_df(CLASSES_CSV, 50)
df = df.reindex(np.random.permutation(df.index))

('Reading...', '../input/train_simplified/eye.csv')
('Reading...', '../input/train_simplified/castle.csv')
('Reading...', '../input/train_simplified/pizza.csv')
('Reading...', '../input/train_simplified/umbrella.csv')
('Reading...', '../input/train_simplified/bat.csv')
('Reading...', '../input/train_simplified/hot tub.csv')
('Reading...', '../input/train_simplified/diving board.csv')
('Reading...', '../input/train_simplified/wine bottle.csv')
('Reading...', '../input/train_simplified/butterfly.csv')
('Reading...', '../input/train_simplified/bread.csv')
('Reading...', '../input/train_simplified/television.csv')
('Reading...', '../input/train_simplified/peas.csv')
('Reading...', '../input/train_simplified/binoculars.csv')
('Reading...', '../input/train_simplified/basket.csv')
('Reading...', '../input/train_simplified/steak.csv')
('Reading...', '../input/train_simplified/eyeglasses.csv')
('Reading...', '../input/train_simplified/backpack.csv')
('Reading...', '../input/train_simplified/ted

('Reading...', '../input/train_simplified/alarm clock.csv')
('Reading...', '../input/train_simplified/shoe.csv')
('Reading...', '../input/train_simplified/map.csv')
('Reading...', '../input/train_simplified/nail.csv')
('Reading...', '../input/train_simplified/stairs.csv')
('Reading...', '../input/train_simplified/blueberry.csv')
('Reading...', '../input/train_simplified/mailbox.csv')
('Reading...', '../input/train_simplified/remote control.csv')
('Reading...', '../input/train_simplified/hourglass.csv')
('Reading...', '../input/train_simplified/trombone.csv')
('Reading...', '../input/train_simplified/watermelon.csv')
('Reading...', '../input/train_simplified/suitcase.csv')
('Reading...', '../input/train_simplified/telephone.csv')
('Reading...', '../input/train_simplified/golf club.csv')
('Reading...', '../input/train_simplified/sword.csv')
('Reading...', '../input/train_simplified/bridge.csv')
('Reading...', '../input/train_simplified/brain.csv')
('Reading...', '../input/train_simplifie

In [3]:
lbl = LabelEncoder().fit(df['word'])
df['word'] = lbl.transform(df['word'])

In [4]:
df.shape

(34000000, 6)

In [5]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

def draw_cv2(raw_strokes, size=256, lw=6, time_color=True):
    BASE_SIZE = 299
    img = np.zeros((BASE_SIZE, BASE_SIZE), np.uint8)
    for t, stroke in enumerate(eval(raw_strokes)):
        for i in range(len(stroke[0]) - 1):
            color = 255 - min(t, 10) * 13 if time_color else 255
            _ = cv2.line(img, (stroke[0][i] + 22, stroke[1][i]  + 22),
                         (stroke[0][i + 1] + 22, stroke[1][i + 1] + 22), color, lw)
    if size != BASE_SIZE:
        return cv2.resize(img, (size, size))
    else:
        return img

class QRDataset(Dataset):
    def __init__(self, img_drawing, img_label, img_size, transform=None):
        self.img_drawing = img_drawing
        self.img_label = img_label
        self.img_size = img_size
        self.transform = transform

    def __getitem__(self, index):
        img = np.zeros((self.img_size, self.img_size, 3))
        img[:, :, 0] = draw_cv2(self.img_drawing[index], self.img_size)
        img[:, :, 1] = img[:, :, 0]
        img[:, :, 2] = img[:, :, 0]
        img = Image.fromarray(np.uint8(img))
        
        if self.transform is not None:
            img = self.transform(img)
        
        label = torch.from_numpy(np.array([self.img_label[index]]))
        return img, label

    def __len__(self):
        return len(self.img_drawing)

In [27]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

model = models.resnet18(True)
model.avgpool = nn.AdaptiveAvgPool2d(1)
model.fc = nn.Linear(512, 340)

model = model.cuda(0)
optimizer = optim.Adam(model.parameters(), lr=0.005)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2, 3, 5, 7, 8], gamma=0.1)

In [7]:
train_df = df.iloc[:-70000]
val_df = df.iloc[-70000:]

train_loader = torch.utils.data.DataLoader(
    QRDataset(train_df['drawing'].values, train_df['word'].values, 128,
                     transforms.Compose([
                        transforms.RandomHorizontalFlip(),
                        transforms.RandomVerticalFlip(),
                        transforms.RandomAffine(5, scale=[0.85, 1.05]),
                        transforms.ToTensor(),
                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    ),
    batch_size=2000, shuffle=True, num_workers=10,
)

val_loader = torch.utils.data.DataLoader(
    QRDataset(val_df['drawing'].values, val_df['word'].values, 128,
                     transforms.Compose([
                        transforms.RandomHorizontalFlip(),
                        transforms.ToTensor(),
                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    ),
    batch_size=2000, shuffle=True, num_workers=10,
)

In [8]:
import torch.nn.functional as F
loss_fn = nn.CrossEntropyLoss()

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

for epoch in range(10):
    scheduler.step()
    for i, data in enumerate(train_loader):
        x, y = data
        x = Variable(x).cuda(0)
        y = Variable(y.view(-1)).cuda(0)

        optimizer.zero_grad()
        output = model(x)
        loss = loss_fn(output, y)
        loss.backward()
        optimizer.step()

        if i % 100 == 0:
            acc1, acc3 = accuracy(output, y, topk=(1, 3))
            logstr = 'Epoch {0}/{1}: \tloss {2}, ACC {3:.4f}/{4:.4f}'.format(epoch, i, loss.item(), 
                                                                  acc1.item(), acc3.item())
            logging.info(logstr)
        if i % 1000 == 0:
            torch.save(model.state_dict(), 'resnet18_{0}.pt'.format(epoch))

Process Process-4:
Process Process-9:
Process Process-5:
Process Process-10:
Process Process-6:
Process Process-1:
Process Process-3:
Traceback (most recent call last):
Process Process-8:
Process Process-7:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib64/python2.7/multiprocessing/proc

KeyboardInterrupt: 

In [11]:
model = model.eval()
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        images = Variable(images).cuda(0)
        labels = Variable(labels.view(-1)).cuda(0)
        
        outputs = model(images)
        acc1, acc5 = accuracy(outputs, labels, topk=(1, 3))
        print acc1.item(), acc5.item()

80.9000015259 93.1500015259
80.5 93.4000015259
79.3000030518 92.5999984741
79.5500030518 92.0
80.1500015259 92.1500015259
80.7000045776 92.4500045776
81.5 93.5
81.0 92.8000030518
80.5500030518 93.0999984741
81.0 93.1500015259
81.4500045776 92.3000030518
81.3499984741 92.7000045776
79.9000015259 92.8000030518
79.8000030518 92.75
81.0 93.25
81.0 92.75
80.9000015259 92.5500030518
81.9000015259 93.75
80.9000015259 93.25
79.2000045776 92.8000030518
81.5 93.9000015259
80.7000045776 92.6500015259
79.7000045776 92.9000015259
80.7000045776 92.6500015259
81.8000030518 92.8499984741
82.4500045776 92.8000030518
80.4000015259 93.8499984741
79.9500045776 92.8499984741
80.4000015259 92.3499984741
80.0999984741 93.5
80.1500015259 92.75
81.5500030518 93.8000030518
80.5500030518 93.1500015259
80.0500030518 92.4500045776
79.4000015259 92.5


In [22]:
submit = pd.read_csv('../input/sample_submission.csv')
submit_df = pd.read_csv('../input/test_simplified.csv')

In [12]:
test_loader = torch.utils.data.DataLoader(
    QRDataset(submit_df['drawing'].values, np.zeros(submit_df.shape[0]), 64,
                     transforms.Compose([
#                         transforms.RandomHorizontalFlip(),
#                         transforms.RandomVerticalFlip(),
                        transforms.ToTensor(),
        ])
    ),
    batch_size=2000, shuffle=False, num_workers=10,
)

In [13]:
pred = []
for t, (x, y) in enumerate(test_loader):
    x_var = Variable(x.cuda(0))
    y_var = Variable(y.cuda(0))
    scores = model(x_var)
    pred.append(scores.data.cpu().numpy())
pred = np.concatenate(pred, 0)

In [14]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

pred_label = [lbl.inverse_transform(x.argsort()[-3:][::-1]) for x in pred]

In [15]:
pred_label = np.vstack(pred_label)

In [16]:
submit['top1'] = pred_label[:, 0]
submit['top2'] = pred_label[:, 1]
submit['top3'] = pred_label[:, 2]

In [17]:
submit['top1'] = submit['top1'].apply(lambda x: x.replace(' ', '_'))
submit['top2'] = submit['top2'].apply(lambda x: x.replace(' ', '_'))
submit['top3'] = submit['top3'].apply(lambda x: x.replace(' ', '_'))

In [18]:
submit['word'] = submit['top1'] + ' ' + submit['top2'] + ' ' + submit['top3']

In [19]:
submit[['key_id', 'word']].to_csv('./tmp_91.csv', index=None)

In [5]:
'{0:.4f}'.format(2.3)

'2.3000'

In [9]:
model = model.eval()

In [1]:
df

NameError: name 'df' is not defined

In [24]:
train_test_split(df, test_size=0.01)[1].shape

(34000, 6)

In [25]:
submit.to_pickle?

In [29]:
model.train?

In [31]:
'{0:8s}'.format('ss')

'ss      '

In [3]:
import time, datetime

In [15]:
import codecs, time, os
from datetime import datetime

datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3]

'20181110195848839'

In [2]:
datetime.now()

datetime.datetime(2018, 11, 9, 18, 41, 47, 884700)

In [11]:
{"time":"20181109105423524","key":"ump.psoriasis3.logo.tptime","hostname":"10.177.62.4","processState":"0","elapsedTime":"36"}
{"time":"20130415135617820","key":"JCSS.ObjectCheck.checkfile","hostname":" YPT-Wangyuan ","processState":"0","elapsedTime":"36"}

SyntaxError: invalid syntax (<ipython-input-11-4e3b1b4592ff>, line 1)

In [13]:
os.path.exists('./')

True

In [16]:
import netifaces as ni
ni.ifaddresses('eth0')
ip = ni.ifaddresses('eth0')[ni.AF_INET][0]['addr']
print ip  # should print "192.168.100.37"

172.28.220.19


In [18]:
str(ip)

'172.28.220.19'

In [41]:
# -*- coding: utf-8 -*-
import os, sys, json, codecs
from datetime import datetime
from collections import OrderedDict

import netifaces as ni
def getip():
    ni.ifaddresses('eth0')
    ip = ni.ifaddresses('eth0')[ni.AF_INET][0]['addr']
    return str(ip)

def gettime():
    return datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3]

# https://cf.jd.com/pages/viewpage.action?pageId=73256284
# https://cf.jd.com/pages/viewpage.action?pageId=73251808
class UMP(object):
    def __init__(self):
        if not os.path.exists('/export/home/tomcat/UMP-Monitor/logs/'):
            os.makedirs('/export/home/tomcat/UMP-Monitor/logs/')
        self.ip = getip()

    def ump_tp(self, path, key, state, elapesd, host=None):
        '''方法性能监控

        写入格式，
        {"time":"20130415135617820","key":"JCSS.ObjectCheck.checkfile","hostname":" YPT-Wangyuan ","processState":"0","elapsedTime":"36"}

        '''

        timestr = datetime.now().strftime('%Y%m%d%H%M%S%f')[:-3]
        if host == None:
            host = self.ip
        state = str(state)
        elapesd = str(elapesd)

        logjson = OrderedDict([
                    ("time", gettime()), 
                    ("key", key), 
                    ("hostname", host), 
                    ("processState", state),
                    ("elapsedTime", elapesd), 
        ])
        with open('/export/home/tomcat/UMP-Monitor/logs/' + path, 'a') as up:
            up.write(json.dumps(logjson) + '\n')

    def ump_alive(self):
        pass

In [42]:
ump = UMP()

In [43]:
ump.ump_tp('./', '897', '1', '23')

{"time": "20181110202057745", "key": "897", "hostname": "172.28.220.19", "processState": "1", "elapsedTime": "23"}


In [44]:
type = 2

In [88]:
a = time.time()
# time.sleep(1)


In [89]:
print str(int((time.time() - a) * 1000))

393


In [4]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

model = models.resnet101(True)
model.avgpool = nn.AdaptiveAvgPool2d(1)
model.fc = nn.Linear(512, 340)

Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.torch/models/resnet101-5d3b4d8f.pth
100%|██████████| 178728960/178728960 [00:11<00:00, 14909357.07it/s]


In [5]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F