In [2]:
%matplotlib inline

import logging
import os
import numpy as np
import pandas as pd
import csv

In [3]:
DATA_ROOT = "data"
RESULTS_ROOT = "results"

In [4]:
from tqdm import tqdm
def read_csv(fname):
    df = pd.read_csv(fname)
    df["issued date"] = pd.to_datetime(df["issued date"], format='%Y-%m-%d')
    return df

file_path = os.path.join(DATA_ROOT, "text_data_medical_imaging.csv")
df = read_csv(file_path)

In [5]:
df.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0


In [6]:
# pns = list(map(str, list(df["pn"])))
pns = df["pn"].astype(str).tolist()
print(pns[:10])
print("length:", len(pns))

['10504621', '10504252', '10504229', '10504227', '10499883', '10499868', '10497118', '10492764', '10492763', '10489910']
length: 1877


## Forward Citation 정보 추가

In [7]:
import sys
from tqdm import tqdm
from uspto.search import search, search_count
import uspto
import uspto.analysis.citation
import uspto.crawl, uspto.parse
import time
uspto.shared.set_html_dir("/share/uspto/html")

In [8]:
def get_f_citation_info(pn, timeout=1000):
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36"
    }
    query = "ref/{}".format(pn)
    
    ref_pns = []
    page = 1
    total = search_count(query, timeout=timeout, headers=headers)
    pns50, is_next = search(query, timeout=timeout, headers=headers, page=page)
    ref_pns += pns50
    while is_next != None:
        page += 1
        pns50, is_next = uspto.search.search(query, is_next, timeout=timeout, headers=headers)
        ref_pns += pns50
        # print("Page {}: {} \n".format(page, str(pns50)))
        if page % 10 == 0:
            # print("################# Progress: {}%".format(((len(pns) + page * 50) / total) * 100))
            pns_json = json.dumps(pns)
            f = open("pns9070.txt", "w")
            f.write(pns_json)
            f.write("Page: {}".format(page))
            f.close()
    
    total_len = len(ref_pns)
    forward_df_list = []
    p = uspto.Patent(pn, use_db=False)
    issued_year = int(p.date.strftime('%Y-%m-%d')[:4])
    for idx, ref_pn in enumerate(ref_pns):
        if "D" in ref_pn: continue
        try:
            p = uspto.Patent(ref_pn, use_db=False)
            # print("pn: ", str(ref_pn), "(" + str(idx+1) + "/" + str(total_len) + ")")
            issued_date = p.date.strftime('%Y-%m-%d')[:4] if p.date != None else ''
            forward_df_list.append([ref_pn, issued_date])
        except KeyboardInterrupt:
            f.close()
            break
        except (FileNotFoundError, IndexError, ConnectionError): # TODO 제대로 크롤링 하지 못한 html이 그대로 남아 있는 경우가 존재.
            # print("\tpn: ", ref_pn, " not found or exist but something wrong. and try crwal pn =", ref_pn)
            if uspto.crawl.save_html(ref_pn, overwrite=True, retry=10):
                # print("\tCrawling success! and parse again..")
                time.sleep(8)
                try:
                    p = uspto.Patent(ref_pn, use_db=False)
                    # print("\tpn: ", ref_pn, "(" + str(idx+1) + "/" + str(total_len) + ")")
                    issued_date = p.date.strftime('%Y-%m-%d')[:4] if p.date != None else ''
                    forward_df_list.append([ref_pn, issued_date])
                except:
                    # print("\tUnexpected error:", sys.exc_info()[0])
                    # print("\tUnexpected error:", ref_pn)
                    continue

            else:
                # print("\tCrawling failed. Continue.:", ref_pn)
                continue
        except:
            # print("Unexpected error:", sys.exc_info()[0])
            # continue
            raise

    forward_df = pd.DataFrame(forward_df_list, columns=["pn", "year"])
    forward_df["year"] = pd.to_numeric(forward_df["year"])

    given_set = list(range(1984, 2021))
    temp = forward_df["year"].value_counts(sort=True)

    count_df = pd.Series(0, index=given_set)
    count_df = count_df.add(temp, fill_value=0)
    count_df.name = pn
    return count_df

In [11]:
from tqdm import tqdm
counts = pd.DataFrame(columns=list(range(1984, 2021)))

In [12]:
for pn in tqdm(pns):
    if pn in counts.index: continue
    p = uspto.Patent(pn, use_db=False)
    success = False
    while not success:
        try:
            count_df = get_f_citation_info(pn, timeout=20)
            success = True
            counts = counts.append(count_df)
        except KeyboardInterrupt:
            break
        except:
            print("retry!")
            continue


  0%|          | 0/1877 [00:00<?, ?it/s][A
  0%|          | 1/1877 [00:08<4:33:48,  8.76s/it][A
  0%|          | 2/1877 [00:10<3:27:20,  6.63s/it][A
  0%|          | 3/1877 [00:13<2:54:19,  5.58s/it][A
  0%|          | 4/1877 [00:15<2:15:47,  4.35s/it][A
  0%|          | 5/1877 [00:16<1:51:14,  3.57s/it][A
  0%|          | 6/1877 [00:18<1:31:45,  2.94s/it][A
  0%|          | 7/1877 [00:21<1:31:58,  2.95s/it][A
  0%|          | 8/1877 [00:22<1:18:29,  2.52s/it][A
  0%|          | 9/1877 [00:24<1:09:11,  2.22s/it][A
  1%|          | 10/1877 [00:25<1:04:06,  2.06s/it][A

retry!
retry!



  1%|          | 11/1877 [01:29<10:40:39, 20.60s/it][A
  1%|          | 12/1877 [01:31<7:43:23, 14.91s/it] [A
  1%|          | 13/1877 [01:33<5:42:00, 11.01s/it][A
  1%|          | 14/1877 [01:34<4:14:00,  8.18s/it][A
  1%|          | 15/1877 [01:37<3:25:28,  6.62s/it][A
  1%|          | 16/1877 [01:39<2:37:05,  5.06s/it][A
  1%|          | 17/1877 [01:42<2:22:55,  4.61s/it][A
  1%|          | 18/1877 [01:44<1:53:56,  3.68s/it][A
  1%|          | 19/1877 [01:45<1:33:58,  3.03s/it][A
  1%|          | 20/1877 [01:47<1:20:14,  2.59s/it][A
  1%|          | 21/1877 [01:49<1:11:51,  2.32s/it][A
  1%|          | 22/1877 [01:50<1:05:30,  2.12s/it][A
  1%|          | 23/1877 [01:52<1:01:02,  1.98s/it][A
  1%|▏         | 24/1877 [01:54<57:13,  1.85s/it]  [A

retry!
retry!



  1%|▏         | 25/1877 [02:57<10:30:19, 20.42s/it][A
  1%|▏         | 26/1877 [02:59<7:36:20, 14.79s/it] [A
  1%|▏         | 27/1877 [03:01<5:35:18, 10.88s/it][A
  1%|▏         | 28/1877 [03:02<4:08:20,  8.06s/it][A
  2%|▏         | 29/1877 [03:04<3:08:24,  6.12s/it][A
  2%|▏         | 30/1877 [03:05<2:25:11,  4.72s/it][A
  2%|▏         | 31/1877 [03:07<1:58:42,  3.86s/it][A
  2%|▏         | 32/1877 [03:09<1:37:16,  3.16s/it][A
  2%|▏         | 33/1877 [03:10<1:22:33,  2.69s/it][A
  2%|▏         | 34/1877 [03:12<1:11:36,  2.33s/it][A
  2%|▏         | 35/1877 [03:13<1:03:55,  2.08s/it][A
  2%|▏         | 36/1877 [03:15<59:01,  1.92s/it]  [A
  2%|▏         | 37/1877 [03:16<54:57,  1.79s/it][A
  2%|▏         | 38/1877 [03:18<51:58,  1.70s/it][A

retry!
retry!



  2%|▏         | 39/1877 [04:23<10:37:59, 20.83s/it][A
  2%|▏         | 40/1877 [04:26<7:56:57, 15.58s/it] [A
  2%|▏         | 41/1877 [04:28<5:52:04, 11.51s/it][A
  2%|▏         | 42/1877 [04:32<4:36:30,  9.04s/it][A
  2%|▏         | 43/1877 [04:33<3:28:30,  6.82s/it][A
  2%|▏         | 44/1877 [04:35<2:40:51,  5.27s/it][A
  2%|▏         | 45/1877 [04:38<2:17:25,  4.50s/it][A
  2%|▏         | 46/1877 [04:40<1:54:51,  3.76s/it][A
  3%|▎         | 47/1877 [04:42<1:38:20,  3.22s/it][A
  3%|▎         | 48/1877 [04:43<1:23:15,  2.73s/it][A
  3%|▎         | 49/1877 [04:46<1:25:16,  2.80s/it][A

retry!
retry!



  3%|▎         | 50/1877 [05:50<10:44:10, 21.15s/it][A
  3%|▎         | 51/1877 [05:52<7:45:39, 15.30s/it] [A
  3%|▎         | 52/1877 [05:54<5:41:47, 11.24s/it][A
  3%|▎         | 53/1877 [05:55<4:15:00,  8.39s/it][A
  3%|▎         | 54/1877 [05:57<3:14:26,  6.40s/it][A
  3%|▎         | 55/1877 [05:59<2:32:50,  5.03s/it][A
  3%|▎         | 56/1877 [06:01<2:02:01,  4.02s/it][A
  3%|▎         | 57/1877 [06:03<1:44:31,  3.45s/it][A
  3%|▎         | 58/1877 [06:04<1:28:06,  2.91s/it][A
  3%|▎         | 59/1877 [06:06<1:16:24,  2.52s/it][A
  3%|▎         | 60/1877 [06:08<1:07:20,  2.22s/it][A
  3%|▎         | 61/1877 [06:09<1:00:49,  2.01s/it][A
  3%|▎         | 62/1877 [06:11<56:31,  1.87s/it]  [A

retry!
retry!



  3%|▎         | 63/1877 [07:15<10:20:24, 20.52s/it][A
  3%|▎         | 64/1877 [07:17<7:30:39, 14.91s/it] [A
  3%|▎         | 65/1877 [07:18<5:30:50, 10.95s/it][A
  4%|▎         | 66/1877 [07:20<4:08:33,  8.23s/it][A
  4%|▎         | 67/1877 [07:22<3:10:28,  6.31s/it][A
  4%|▎         | 68/1877 [07:24<2:28:34,  4.93s/it][A
  4%|▎         | 69/1877 [07:26<2:08:20,  4.26s/it][A
  4%|▎         | 70/1877 [07:28<1:45:40,  3.51s/it][A
  4%|▍         | 71/1877 [07:30<1:29:26,  2.97s/it][A
  4%|▍         | 72/1877 [07:32<1:21:15,  2.70s/it][A
  4%|▍         | 73/1877 [07:35<1:26:06,  2.86s/it][A

retry!
retry!



  4%|▍         | 74/1877 [08:39<10:38:06, 21.24s/it][A
  4%|▍         | 75/1877 [08:41<7:42:53, 15.41s/it] [A
  4%|▍         | 76/1877 [08:43<5:38:02, 11.26s/it][A
  4%|▍         | 77/1877 [08:45<4:15:41,  8.52s/it][A
  4%|▍         | 78/1877 [09:06<6:09:45, 12.33s/it][A
  4%|▍         | 79/1877 [09:08<4:32:47,  9.10s/it][A
  4%|▍         | 80/1877 [09:09<3:25:02,  6.85s/it][A
  4%|▍         | 81/1877 [09:11<2:38:00,  5.28s/it][A
  4%|▍         | 82/1877 [09:12<2:05:15,  4.19s/it][A
  4%|▍         | 83/1877 [09:36<5:02:46, 10.13s/it][A
  4%|▍         | 84/1877 [09:38<3:45:50,  7.56s/it][A
  5%|▍         | 85/1877 [09:40<2:54:29,  5.84s/it][A
  5%|▍         | 86/1877 [09:42<2:18:53,  4.65s/it][A
  5%|▍         | 87/1877 [09:43<1:52:36,  3.77s/it][A
  5%|▍         | 88/1877 [09:45<1:33:35,  3.14s/it][A
  5%|▍         | 89/1877 [09:47<1:21:13,  2.73s/it][A
  5%|▍         | 90/1877 [09:48<1:10:32,  2.37s/it][A
  5%|▍         | 91/1877 [09:50<1:07:29,  2.27s/it][A
  5%|▍ 

retry!
retry!



  5%|▌         | 98/1877 [11:06<10:16:34, 20.80s/it][A
  5%|▌         | 99/1877 [11:08<7:27:06, 15.09s/it] [A
  5%|▌         | 100/1877 [11:11<5:39:35, 11.47s/it][A
  5%|▌         | 101/1877 [11:12<4:12:01,  8.51s/it][A
  5%|▌         | 102/1877 [11:15<3:23:46,  6.89s/it][A
  5%|▌         | 103/1877 [11:17<2:35:43,  5.27s/it][A
  6%|▌         | 104/1877 [11:18<2:01:50,  4.12s/it][A
  6%|▌         | 105/1877 [11:20<1:42:47,  3.48s/it][A
  6%|▌         | 106/1877 [11:22<1:26:42,  2.94s/it][A
  6%|▌         | 107/1877 [11:24<1:16:39,  2.60s/it][A
  6%|▌         | 108/1877 [11:25<1:07:24,  2.29s/it][A

retry!
retry!



  6%|▌         | 109/1877 [12:31<10:30:02, 21.38s/it][A
  6%|▌         | 110/1877 [12:33<7:36:15, 15.49s/it] [A
  6%|▌         | 111/1877 [12:36<5:45:57, 11.75s/it][A
  6%|▌         | 112/1877 [12:38<4:18:11,  8.78s/it][A
  6%|▌         | 113/1877 [12:40<3:14:18,  6.61s/it][A
  6%|▌         | 114/1877 [12:41<2:29:47,  5.10s/it][A
  6%|▌         | 115/1877 [12:43<1:59:05,  4.06s/it][A
  6%|▌         | 116/1877 [12:44<1:36:42,  3.30s/it][A
  6%|▌         | 117/1877 [12:46<1:21:07,  2.77s/it][A
  6%|▋         | 118/1877 [12:49<1:26:44,  2.96s/it][A
  6%|▋         | 119/1877 [12:51<1:15:48,  2.59s/it][A
  6%|▋         | 120/1877 [12:52<1:06:29,  2.27s/it][A
  6%|▋         | 121/1877 [12:54<59:56,  2.05s/it]  [A
  6%|▋         | 122/1877 [12:56<59:11,  2.02s/it][A

retry!
retry!



  7%|▋         | 123/1877 [13:59<9:52:05, 20.25s/it][A
  7%|▋         | 124/1877 [14:00<7:08:57, 14.68s/it][A
  7%|▋         | 125/1877 [14:02<5:14:01, 10.75s/it][A
  7%|▋         | 126/1877 [14:05<4:08:45,  8.52s/it][A
  7%|▋         | 127/1877 [14:07<3:07:52,  6.44s/it][A
  7%|▋         | 128/1877 [14:09<2:25:38,  5.00s/it][A
  7%|▋         | 129/1877 [14:10<1:56:19,  3.99s/it][A
  7%|▋         | 130/1877 [14:13<1:50:29,  3.79s/it][A
  7%|▋         | 131/1877 [14:15<1:31:16,  3.14s/it][A
  7%|▋         | 132/1877 [14:17<1:17:29,  2.66s/it][A
  7%|▋         | 133/1877 [14:29<2:41:24,  5.55s/it][A
  7%|▋         | 134/1877 [14:32<2:17:39,  4.74s/it][A
  7%|▋         | 135/1877 [14:34<1:58:31,  4.08s/it][A
  7%|▋         | 136/1877 [14:36<1:38:15,  3.39s/it][A
  7%|▋         | 137/1877 [14:39<1:35:00,  3.28s/it][A
  7%|▋         | 138/1877 [14:41<1:19:07,  2.73s/it][A
  7%|▋         | 139/1877 [14:45<1:31:28,  3.16s/it][A
  7%|▋         | 140/1877 [14:46<1:18:46,  2.72

retry!



  8%|▊         | 147/1877 [16:00<9:42:17, 20.19s/it][A
  8%|▊         | 148/1877 [16:02<7:00:54, 14.61s/it][A
  8%|▊         | 149/1877 [16:04<5:08:03, 10.70s/it][A
  8%|▊         | 150/1877 [16:22<6:13:15, 12.97s/it][A
  8%|▊         | 151/1877 [16:23<4:34:06,  9.53s/it][A
  8%|▊         | 152/1877 [16:25<3:24:47,  7.12s/it][A
  8%|▊         | 153/1877 [16:28<2:50:06,  5.92s/it][A
  8%|▊         | 154/1877 [16:30<2:12:02,  4.60s/it][A
  8%|▊         | 155/1877 [16:31<1:45:29,  3.68s/it][A
  8%|▊         | 156/1877 [16:33<1:27:34,  3.05s/it][A
  8%|▊         | 157/1877 [16:34<1:15:55,  2.65s/it][A
  8%|▊         | 158/1877 [16:36<1:06:44,  2.33s/it][A
  8%|▊         | 159/1877 [16:38<1:00:32,  2.11s/it][A
  9%|▊         | 160/1877 [16:50<2:27:37,  5.16s/it][A
  9%|▊         | 161/1877 [16:51<1:56:11,  4.06s/it][A
  9%|▊         | 162/1877 [16:53<1:34:32,  3.31s/it][A
  9%|▊         | 163/1877 [16:54<1:19:11,  2.77s/it][A
  9%|▊         | 164/1877 [16:57<1:20:53,  2.83

retry!
retry!



  9%|▉         | 177/1877 [18:41<10:16:55, 21.77s/it][A
  9%|▉         | 178/1877 [18:43<7:28:42, 15.85s/it] [A
 10%|▉         | 179/1877 [18:45<5:28:26, 11.61s/it][A
 10%|▉         | 180/1877 [18:47<4:04:17,  8.64s/it][A
 10%|▉         | 181/1877 [18:49<3:06:54,  6.61s/it][A
 10%|▉         | 182/1877 [18:51<2:30:49,  5.34s/it][A
 10%|▉         | 183/1877 [18:54<2:06:37,  4.48s/it][A
 10%|▉         | 184/1877 [18:55<1:41:27,  3.60s/it][A
 10%|▉         | 185/1877 [18:57<1:25:48,  3.04s/it][A
 10%|▉         | 186/1877 [18:58<1:13:59,  2.63s/it][A
 10%|▉         | 187/1877 [19:00<1:06:28,  2.36s/it][A
 10%|█         | 188/1877 [19:03<1:06:46,  2.37s/it][A
 10%|█         | 189/1877 [19:06<1:17:02,  2.74s/it][A
 10%|█         | 190/1877 [19:08<1:07:28,  2.40s/it][A
 10%|█         | 191/1877 [19:09<1:00:36,  2.16s/it][A

retry!
retry!



 10%|█         | 192/1877 [20:13<9:37:59, 20.58s/it][A
 10%|█         | 193/1877 [20:15<6:58:21, 14.91s/it][A
 10%|█         | 194/1877 [20:16<5:05:23, 10.89s/it][A
 10%|█         | 195/1877 [20:18<3:48:07,  8.14s/it][A
 10%|█         | 196/1877 [20:19<2:52:36,  6.16s/it][A
 10%|█         | 197/1877 [20:21<2:13:28,  4.77s/it][A
 11%|█         | 198/1877 [20:23<1:47:45,  3.85s/it][A
 11%|█         | 199/1877 [20:24<1:28:37,  3.17s/it][A
 11%|█         | 200/1877 [20:26<1:14:12,  2.66s/it][A
 11%|█         | 201/1877 [20:27<1:05:41,  2.35s/it][A
 11%|█         | 202/1877 [20:31<1:13:24,  2.63s/it][A
 11%|█         | 203/1877 [20:32<1:03:58,  2.29s/it][A

retry!
retry!



 11%|█         | 204/1877 [21:36<9:38:26, 20.74s/it][A
 11%|█         | 205/1877 [21:38<6:59:40, 15.06s/it][A
 11%|█         | 206/1877 [21:39<5:06:36, 11.01s/it][A
 11%|█         | 207/1877 [21:41<3:47:55,  8.19s/it][A
 11%|█         | 208/1877 [21:42<2:52:45,  6.21s/it][A
 11%|█         | 209/1877 [21:44<2:14:00,  4.82s/it][A
 11%|█         | 210/1877 [21:46<1:49:00,  3.92s/it][A
 11%|█         | 211/1877 [21:47<1:30:01,  3.24s/it][A
 11%|█▏        | 212/1877 [21:49<1:16:51,  2.77s/it][A
 11%|█▏        | 213/1877 [21:51<1:07:17,  2.43s/it][A

retry!
retry!



 11%|█▏        | 214/1877 [22:55<9:37:48, 20.85s/it][A
 11%|█▏        | 215/1877 [22:56<6:57:39, 15.08s/it][A
 12%|█▏        | 216/1877 [22:58<5:05:33, 11.04s/it][A
 12%|█▏        | 217/1877 [22:59<3:47:09,  8.21s/it][A
 12%|█▏        | 218/1877 [23:01<2:54:58,  6.33s/it][A
 12%|█▏        | 219/1877 [23:03<2:14:42,  4.87s/it][A
 12%|█▏        | 220/1877 [23:15<3:16:34,  7.12s/it][A
 12%|█▏        | 221/1877 [23:17<2:30:49,  5.46s/it][A
 12%|█▏        | 222/1877 [23:19<1:59:47,  4.34s/it][A
 12%|█▏        | 223/1877 [23:20<1:38:28,  3.57s/it][A
 12%|█▏        | 224/1877 [23:24<1:37:38,  3.54s/it][A
 12%|█▏        | 225/1877 [23:25<1:21:01,  2.94s/it][A
 12%|█▏        | 226/1877 [23:27<1:10:58,  2.58s/it][A
 12%|█▏        | 227/1877 [23:30<1:14:12,  2.70s/it][A
 12%|█▏        | 228/1877 [23:32<1:08:24,  2.49s/it][A
 12%|█▏        | 229/1877 [23:35<1:08:52,  2.51s/it][A
 12%|█▏        | 230/1877 [23:36<1:00:42,  2.21s/it][A
 12%|█▏        | 231/1877 [23:38<55:30,  2.02s/

retry!
retry!



 13%|█▎        | 248/1877 [25:30<9:22:13, 20.71s/it][A
 13%|█▎        | 249/1877 [25:32<6:46:09, 14.97s/it][A
 13%|█▎        | 250/1877 [25:34<5:00:20, 11.08s/it][A
 13%|█▎        | 251/1877 [25:36<3:43:01,  8.23s/it][A
 13%|█▎        | 252/1877 [25:37<2:51:07,  6.32s/it][A
 13%|█▎        | 253/1877 [25:39<2:12:46,  4.91s/it][A
 14%|█▎        | 254/1877 [25:41<1:46:28,  3.94s/it][A
 14%|█▎        | 255/1877 [25:42<1:27:41,  3.24s/it][A
 14%|█▎        | 256/1877 [25:44<1:17:50,  2.88s/it][A

retry!
retry!



 14%|█▎        | 257/1877 [26:49<9:35:52, 21.33s/it][A
 14%|█▎        | 258/1877 [26:51<6:57:46, 15.48s/it][A
 14%|█▍        | 259/1877 [26:52<5:05:46, 11.34s/it][A
 14%|█▍        | 260/1877 [26:54<3:46:12,  8.39s/it][A
 14%|█▍        | 261/1877 [26:55<2:49:50,  6.31s/it][A
 14%|█▍        | 262/1877 [26:58<2:18:12,  5.13s/it][A
 14%|█▍        | 263/1877 [26:59<1:48:51,  4.05s/it][A
 14%|█▍        | 264/1877 [27:01<1:29:27,  3.33s/it][A
 14%|█▍        | 265/1877 [27:05<1:32:38,  3.45s/it][A
 14%|█▍        | 266/1877 [27:06<1:17:24,  2.88s/it][A
 14%|█▍        | 267/1877 [27:08<1:06:27,  2.48s/it][A
 14%|█▍        | 268/1877 [27:09<1:00:12,  2.25s/it][A
 14%|█▍        | 269/1877 [27:13<1:09:41,  2.60s/it][A
 14%|█▍        | 270/1877 [27:15<1:04:40,  2.41s/it][A

retry!



 14%|█▍        | 271/1877 [28:17<9:05:04, 20.36s/it][A
 14%|█▍        | 272/1877 [28:19<6:33:59, 14.73s/it][A
 15%|█▍        | 273/1877 [28:20<4:48:16, 10.78s/it][A
 15%|█▍        | 274/1877 [28:24<3:50:43,  8.64s/it][A
 15%|█▍        | 275/1877 [28:25<2:53:32,  6.50s/it][A
 15%|█▍        | 276/1877 [28:27<2:14:17,  5.03s/it][A
 15%|█▍        | 277/1877 [28:28<1:46:15,  3.98s/it][A
 15%|█▍        | 278/1877 [28:30<1:27:10,  3.27s/it][A
 15%|█▍        | 279/1877 [28:32<1:13:15,  2.75s/it][A
 15%|█▍        | 280/1877 [28:33<1:04:17,  2.42s/it][A
 15%|█▍        | 281/1877 [28:35<57:27,  2.16s/it]  [A
 15%|█▌        | 282/1877 [28:36<53:26,  2.01s/it][A

retry!



 15%|█▌        | 283/1877 [29:40<9:04:46, 20.51s/it][A
 15%|█▌        | 284/1877 [29:43<6:47:24, 15.35s/it][A
 15%|█▌        | 285/1877 [29:45<4:57:14, 11.20s/it][A
 15%|█▌        | 286/1877 [29:47<3:40:13,  8.31s/it][A
 15%|█▌        | 287/1877 [29:48<2:45:58,  6.26s/it][A
 15%|█▌        | 288/1877 [29:50<2:08:35,  4.86s/it][A
 15%|█▌        | 289/1877 [29:54<2:01:38,  4.60s/it][A
 15%|█▌        | 290/1877 [29:56<1:46:57,  4.04s/it][A
 16%|█▌        | 291/1877 [29:58<1:27:28,  3.31s/it][A
 16%|█▌        | 292/1877 [30:10<2:40:42,  6.08s/it][A
 16%|█▌        | 293/1877 [30:13<2:13:52,  5.07s/it][A
 16%|█▌        | 294/1877 [30:15<1:45:31,  4.00s/it][A
 16%|█▌        | 295/1877 [30:16<1:25:42,  3.25s/it][A
 16%|█▌        | 296/1877 [30:19<1:20:52,  3.07s/it][A
 16%|█▌        | 297/1877 [30:20<1:09:23,  2.63s/it][A
 16%|█▌        | 298/1877 [30:22<1:00:55,  2.32s/it][A
 16%|█▌        | 299/1877 [30:24<54:24,  2.07s/it]  [A
 16%|█▌        | 300/1877 [30:26<59:10,  2.25s/

retry!
retry!



 17%|█▋        | 328/1877 [33:14<9:03:17, 21.04s/it][A
 18%|█▊        | 329/1877 [33:17<6:41:32, 15.56s/it][A
 18%|█▊        | 330/1877 [33:20<5:03:39, 11.78s/it][A
 18%|█▊        | 331/1877 [33:22<3:50:46,  8.96s/it][A
 18%|█▊        | 332/1877 [33:38<4:42:24, 10.97s/it][A
 18%|█▊        | 333/1877 [33:40<3:33:40,  8.30s/it][A
 18%|█▊        | 334/1877 [33:43<2:52:25,  6.71s/it][A
 18%|█▊        | 335/1877 [33:45<2:13:24,  5.19s/it][A
 18%|█▊        | 336/1877 [33:46<1:47:11,  4.17s/it][A
 18%|█▊        | 337/1877 [33:48<1:27:30,  3.41s/it][A
 18%|█▊        | 338/1877 [33:50<1:13:45,  2.88s/it][A
 18%|█▊        | 339/1877 [33:51<1:03:52,  2.49s/it][A
 18%|█▊        | 340/1877 [33:53<59:04,  2.31s/it]  [A
 18%|█▊        | 341/1877 [33:56<1:06:43,  2.61s/it][A
 18%|█▊        | 342/1877 [33:58<1:00:13,  2.35s/it][A
 18%|█▊        | 343/1877 [34:00<55:05,  2.15s/it]  [A
 18%|█▊        | 344/1877 [34:02<53:17,  2.09s/it][A
 18%|█▊        | 345/1877 [34:04<52:23,  2.05s/it

retry!
retry!



 19%|█▊        | 348/1877 [35:13<8:57:02, 21.07s/it][A
 19%|█▊        | 349/1877 [35:16<6:32:55, 15.43s/it][A
 19%|█▊        | 350/1877 [35:17<4:47:06, 11.28s/it][A
 19%|█▊        | 351/1877 [35:19<3:32:53,  8.37s/it][A
 19%|█▉        | 352/1877 [35:21<2:45:32,  6.51s/it][A
 19%|█▉        | 353/1877 [35:23<2:08:54,  5.08s/it][A
 19%|█▉        | 354/1877 [35:25<1:46:04,  4.18s/it][A
 19%|█▉        | 355/1877 [35:27<1:28:10,  3.48s/it][A
 19%|█▉        | 356/1877 [35:28<1:13:41,  2.91s/it][A
 19%|█▉        | 357/1877 [35:30<1:06:55,  2.64s/it][A
 19%|█▉        | 358/1877 [35:34<1:17:45,  3.07s/it][A
 19%|█▉        | 359/1877 [35:38<1:22:29,  3.26s/it][A
 19%|█▉        | 360/1877 [35:40<1:09:19,  2.74s/it][A
 19%|█▉        | 361/1877 [35:41<1:01:16,  2.43s/it][A

retry!
retry!



 19%|█▉        | 362/1877 [36:45<8:48:25, 20.93s/it][A
 19%|█▉        | 363/1877 [36:47<6:23:34, 15.20s/it][A
 19%|█▉        | 364/1877 [36:49<4:41:04, 11.15s/it][A
 19%|█▉        | 365/1877 [36:51<3:28:34,  8.28s/it][A
 19%|█▉        | 366/1877 [36:52<2:37:37,  6.26s/it][A
 20%|█▉        | 367/1877 [36:56<2:17:30,  5.46s/it][A
 20%|█▉        | 368/1877 [36:58<1:51:03,  4.42s/it][A
 20%|█▉        | 369/1877 [37:01<1:39:50,  3.97s/it][A
 20%|█▉        | 370/1877 [37:02<1:20:59,  3.22s/it][A
 20%|█▉        | 371/1877 [37:23<3:30:46,  8.40s/it][A
 20%|█▉        | 372/1877 [37:26<2:50:26,  6.80s/it][A
 20%|█▉        | 373/1877 [37:27<2:11:02,  5.23s/it][A
 20%|█▉        | 374/1877 [37:29<1:43:55,  4.15s/it][A
 20%|█▉        | 375/1877 [37:32<1:39:10,  3.96s/it][A
 20%|██        | 376/1877 [37:34<1:21:26,  3.26s/it][A
 20%|██        | 377/1877 [37:36<1:10:17,  2.81s/it][A
 20%|██        | 378/1877 [37:39<1:10:57,  2.84s/it][A
 20%|██        | 379/1877 [37:40<1:01:57,  2.48

retry!
retry!



 21%|██        | 385/1877 [38:54<8:30:59, 20.55s/it][A
 21%|██        | 386/1877 [38:56<6:11:28, 14.95s/it][A
 21%|██        | 387/1877 [38:58<4:31:24, 10.93s/it][A
 21%|██        | 388/1877 [39:00<3:21:39,  8.13s/it][A
 21%|██        | 389/1877 [39:01<2:32:32,  6.15s/it][A
 21%|██        | 390/1877 [39:03<2:01:31,  4.90s/it][A
 21%|██        | 391/1877 [39:05<1:36:29,  3.90s/it][A
 21%|██        | 392/1877 [39:06<1:19:09,  3.20s/it][A
 21%|██        | 393/1877 [39:10<1:22:18,  3.33s/it][A
 21%|██        | 394/1877 [39:11<1:09:25,  2.81s/it][A
 21%|██        | 395/1877 [39:13<1:00:29,  2.45s/it][A
 21%|██        | 396/1877 [39:15<54:07,  2.19s/it]  [A
 21%|██        | 397/1877 [39:16<49:48,  2.02s/it][A

retry!



 21%|██        | 398/1877 [40:18<8:15:09, 20.09s/it][A
 21%|██▏       | 399/1877 [40:21<6:02:41, 14.72s/it][A
 21%|██▏       | 400/1877 [40:22<4:25:11, 10.77s/it][A
 21%|██▏       | 401/1877 [40:24<3:17:08,  8.01s/it][A
 21%|██▏       | 402/1877 [40:25<2:30:11,  6.11s/it][A
 21%|██▏       | 403/1877 [40:27<1:57:29,  4.78s/it][A
 22%|██▏       | 404/1877 [40:41<3:02:32,  7.44s/it][A
 22%|██▏       | 405/1877 [40:42<2:19:24,  5.68s/it][A
 22%|██▏       | 406/1877 [40:44<1:49:08,  4.45s/it][A
 22%|██▏       | 407/1877 [40:47<1:35:24,  3.89s/it][A
 22%|██▏       | 408/1877 [40:48<1:19:23,  3.24s/it][A
 22%|██▏       | 409/1877 [40:52<1:19:35,  3.25s/it][A
 22%|██▏       | 410/1877 [40:55<1:18:38,  3.22s/it][A
 22%|██▏       | 411/1877 [40:56<1:06:25,  2.72s/it][A
 22%|██▏       | 412/1877 [40:59<1:06:45,  2.73s/it][A
 22%|██▏       | 413/1877 [41:01<57:53,  2.37s/it]  [A
 22%|██▏       | 414/1877 [41:02<52:33,  2.16s/it][A
 22%|██▏       | 415/1877 [41:04<49:07,  2.02s/it

retry!
retry!



 22%|██▏       | 422/1877 [42:22<8:23:49, 20.78s/it][A
 23%|██▎       | 423/1877 [42:24<6:05:16, 15.07s/it][A
 23%|██▎       | 424/1877 [42:25<4:27:39, 11.05s/it][A
 23%|██▎       | 425/1877 [42:28<3:29:50,  8.67s/it][A
 23%|██▎       | 426/1877 [42:31<2:43:20,  6.75s/it][A
 23%|██▎       | 427/1877 [42:32<2:06:34,  5.24s/it][A
 23%|██▎       | 428/1877 [42:35<1:46:49,  4.42s/it][A
 23%|██▎       | 429/1877 [42:38<1:36:57,  4.02s/it][A
 23%|██▎       | 430/1877 [42:40<1:19:40,  3.30s/it][A
 23%|██▎       | 431/1877 [42:41<1:07:02,  2.78s/it][A
 23%|██▎       | 432/1877 [42:43<58:19,  2.42s/it]  [A
 23%|██▎       | 433/1877 [42:46<1:01:17,  2.55s/it][A
 23%|██▎       | 434/1877 [42:48<56:56,  2.37s/it]  [A
 23%|██▎       | 435/1877 [42:49<50:58,  2.12s/it][A

retry!
retry!



 23%|██▎       | 436/1877 [43:53<8:14:22, 20.58s/it][A
 23%|██▎       | 437/1877 [43:54<5:57:24, 14.89s/it][A
 23%|██▎       | 438/1877 [43:56<4:21:56, 10.92s/it][A
 23%|██▎       | 439/1877 [43:58<3:15:14,  8.15s/it][A
 23%|██▎       | 440/1877 [44:01<2:39:49,  6.67s/it][A
 23%|██▎       | 441/1877 [44:04<2:17:34,  5.75s/it][A
 24%|██▎       | 442/1877 [44:08<2:04:51,  5.22s/it][A
 24%|██▎       | 443/1877 [44:11<1:43:22,  4.32s/it][A
 24%|██▎       | 444/1877 [44:12<1:23:47,  3.51s/it][A
 24%|██▎       | 445/1877 [44:14<1:10:24,  2.95s/it][A
 24%|██▍       | 446/1877 [44:17<1:12:55,  3.06s/it][A
 24%|██▍       | 447/1877 [44:20<1:10:20,  2.95s/it][A
 24%|██▍       | 448/1877 [44:22<1:00:20,  2.53s/it][A
 24%|██▍       | 449/1877 [44:23<53:21,  2.24s/it]  [A
 24%|██▍       | 450/1877 [44:25<48:43,  2.05s/it][A
 24%|██▍       | 451/1877 [44:27<48:06,  2.02s/it][A
 24%|██▍       | 452/1877 [44:30<55:33,  2.34s/it][A
 24%|██▍       | 453/1877 [44:31<51:14,  2.16s/it][A

retry!
retry!



 25%|██▍       | 463/1877 [45:56<8:15:03, 21.01s/it][A
 25%|██▍       | 464/1877 [45:58<6:00:20, 15.30s/it][A
 25%|██▍       | 465/1877 [46:02<4:34:43, 11.67s/it][A
 25%|██▍       | 466/1877 [46:05<3:34:56,  9.14s/it][A
 25%|██▍       | 467/1877 [46:06<2:41:02,  6.85s/it][A
 25%|██▍       | 468/1877 [46:09<2:07:34,  5.43s/it][A
 25%|██▍       | 469/1877 [46:11<1:49:20,  4.66s/it][A
 25%|██▌       | 470/1877 [46:33<3:45:46,  9.63s/it][A
 25%|██▌       | 471/1877 [46:35<2:51:29,  7.32s/it][A
 25%|██▌       | 472/1877 [46:36<2:11:40,  5.62s/it][A
 25%|██▌       | 473/1877 [46:38<1:44:21,  4.46s/it][A
 25%|██▌       | 474/1877 [46:40<1:24:14,  3.60s/it][A
 25%|██▌       | 475/1877 [46:41<1:11:17,  3.05s/it][A
 25%|██▌       | 476/1877 [46:43<1:01:30,  2.63s/it][A
 25%|██▌       | 477/1877 [46:46<1:07:08,  2.88s/it][A
 25%|██▌       | 478/1877 [46:49<1:04:30,  2.77s/it][A
 26%|██▌       | 479/1877 [46:51<56:08,  2.41s/it]  [A
 26%|██▌       | 480/1877 [46:54<1:01:15,  2.63

retry!
retry!



 26%|██▌       | 485/1877 [48:06<8:02:41, 20.81s/it][A
 26%|██▌       | 486/1877 [48:07<5:48:01, 15.01s/it][A
 26%|██▌       | 487/1877 [48:10<4:25:52, 11.48s/it][A
 26%|██▌       | 488/1877 [48:14<3:29:38,  9.06s/it][A
 26%|██▌       | 489/1877 [48:15<2:38:07,  6.84s/it][A
 26%|██▌       | 490/1877 [48:17<2:01:56,  5.27s/it][A
 26%|██▌       | 491/1877 [48:19<1:42:25,  4.43s/it][A
 26%|██▌       | 492/1877 [48:22<1:26:51,  3.76s/it][A
 26%|██▋       | 493/1877 [48:24<1:14:11,  3.22s/it][A
 26%|██▋       | 494/1877 [48:25<1:03:01,  2.73s/it][A
 26%|██▋       | 495/1877 [48:58<4:33:26, 11.87s/it][A
 26%|██▋       | 496/1877 [49:21<5:50:12, 15.22s/it][A
 26%|██▋       | 497/1877 [49:23<4:17:40, 11.20s/it][A
 27%|██▋       | 498/1877 [49:26<3:21:10,  8.75s/it][A
 27%|██▋       | 499/1877 [49:29<2:38:24,  6.90s/it][A
 27%|██▋       | 500/1877 [49:32<2:14:27,  5.86s/it][A
 27%|██▋       | 501/1877 [49:36<1:57:29,  5.12s/it][A
 27%|██▋       | 502/1877 [49:37<1:33:42,  4.09

retry!
retry!



 32%|███▏      | 592/1877 [55:15<7:26:09, 20.83s/it][A
 32%|███▏      | 593/1877 [55:17<5:24:21, 15.16s/it][A
 32%|███▏      | 594/1877 [55:20<4:04:18, 11.43s/it][A
 32%|███▏      | 595/1877 [55:21<3:00:39,  8.46s/it][A
 32%|███▏      | 596/1877 [56:09<7:10:04, 20.14s/it][A
 32%|███▏      | 597/1877 [56:10<5:12:00, 14.63s/it][A
 32%|███▏      | 598/1877 [56:12<3:48:46, 10.73s/it][A
 32%|███▏      | 599/1877 [56:14<2:51:10,  8.04s/it][A
 32%|███▏      | 600/1877 [56:15<2:11:05,  6.16s/it][A
 32%|███▏      | 601/1877 [56:17<1:42:14,  4.81s/it][A
 32%|███▏      | 602/1877 [56:19<1:22:19,  3.87s/it][A
 32%|███▏      | 603/1877 [56:20<1:08:00,  3.20s/it][A
 32%|███▏      | 604/1877 [56:22<57:51,  2.73s/it]  [A
 32%|███▏      | 605/1877 [56:24<50:40,  2.39s/it][A
 32%|███▏      | 606/1877 [56:25<46:05,  2.18s/it][A
 32%|███▏      | 607/1877 [56:28<46:36,  2.20s/it][A
 32%|███▏      | 608/1877 [56:30<44:56,  2.12s/it][A

retry!
retry!



 32%|███▏      | 609/1877 [57:34<7:17:17, 20.69s/it][A
 32%|███▏      | 610/1877 [57:36<5:20:12, 15.16s/it][A
 33%|███▎      | 611/1877 [57:38<3:54:24, 11.11s/it][A
 33%|███▎      | 612/1877 [57:39<2:54:00,  8.25s/it][A
 33%|███▎      | 613/1877 [57:42<2:17:01,  6.50s/it][A
 33%|███▎      | 614/1877 [57:44<1:51:04,  5.28s/it][A
 33%|███▎      | 615/1877 [57:46<1:29:01,  4.23s/it][A
 33%|███▎      | 616/1877 [57:48<1:14:28,  3.54s/it][A
 33%|███▎      | 617/1877 [57:51<1:13:23,  3.50s/it][A
 33%|███▎      | 618/1877 [57:53<1:00:42,  2.89s/it][A
 33%|███▎      | 619/1877 [57:56<1:02:35,  2.99s/it][A
 33%|███▎      | 620/1877 [57:57<53:58,  2.58s/it]  [A
 33%|███▎      | 621/1877 [57:59<47:54,  2.29s/it][A
 33%|███▎      | 622/1877 [58:02<52:16,  2.50s/it][A
 33%|███▎      | 623/1877 [58:04<46:35,  2.23s/it][A
 33%|███▎      | 624/1877 [58:06<46:08,  2.21s/it][A
 33%|███▎      | 625/1877 [58:07<42:57,  2.06s/it][A
 33%|███▎      | 626/1877 [58:09<41:49,  2.01s/it][A
 33

retry!
retry!



 35%|███▍      | 656/1877 [1:00:58<7:12:55, 21.27s/it][A
 35%|███▌      | 657/1877 [1:01:00<5:12:36, 15.37s/it][A
 35%|███▌      | 658/1877 [1:01:01<3:48:24, 11.24s/it][A
 35%|███▌      | 659/1877 [1:01:03<2:49:44,  8.36s/it][A
 35%|███▌      | 660/1877 [1:01:05<2:10:20,  6.43s/it][A
 35%|███▌      | 661/1877 [1:01:07<1:44:08,  5.14s/it][A
 35%|███▌      | 662/1877 [1:01:11<1:36:14,  4.75s/it][A
 35%|███▌      | 663/1877 [1:01:14<1:27:35,  4.33s/it][A
 35%|███▌      | 664/1877 [1:01:16<1:14:51,  3.70s/it][A
 35%|███▌      | 665/1877 [1:01:18<1:02:27,  3.09s/it][A
 35%|███▌      | 666/1877 [1:01:20<54:07,  2.68s/it]  [A
 36%|███▌      | 667/1877 [1:01:21<48:11,  2.39s/it][A
 36%|███▌      | 668/1877 [1:01:23<45:20,  2.25s/it][A

retry!
retry!



 36%|███▌      | 669/1877 [1:02:29<7:07:51, 21.25s/it][A
 36%|███▌      | 670/1877 [1:02:32<5:20:38, 15.94s/it][A
 36%|███▌      | 671/1877 [1:02:34<3:55:06, 11.70s/it][A
 36%|███▌      | 672/1877 [1:02:36<2:55:14,  8.73s/it][A
 36%|███▌      | 673/1877 [1:02:38<2:17:06,  6.83s/it][A
 36%|███▌      | 674/1877 [1:02:42<1:55:18,  5.75s/it][A
 36%|███▌      | 675/1877 [1:02:43<1:31:07,  4.55s/it][A
 36%|███▌      | 676/1877 [1:02:46<1:21:45,  4.08s/it][A
 36%|███▌      | 677/1877 [1:02:48<1:08:35,  3.43s/it][A
 36%|███▌      | 678/1877 [1:02:52<1:07:20,  3.37s/it][A
 36%|███▌      | 679/1877 [1:02:54<59:33,  2.98s/it]  [A
 36%|███▌      | 680/1877 [1:02:56<54:24,  2.73s/it][A
 36%|███▋      | 681/1877 [1:02:59<59:03,  2.96s/it][A
 36%|███▋      | 682/1877 [1:03:01<51:23,  2.58s/it][A
 36%|███▋      | 683/1877 [1:03:04<51:08,  2.57s/it][A
 36%|███▋      | 684/1877 [1:03:07<53:45,  2.70s/it][A
 36%|███▋      | 685/1877 [1:03:18<1:48:40,  5.47s/it][A
 37%|███▋      | 686/18

retry!
retry!



 40%|███▉      | 747/1877 [1:08:13<6:39:18, 21.20s/it][A
 40%|███▉      | 748/1877 [1:08:16<4:55:19, 15.69s/it][A
 40%|███▉      | 749/1877 [1:08:17<3:35:42, 11.47s/it][A
 40%|███▉      | 750/1877 [1:08:19<2:43:10,  8.69s/it][A
 40%|████      | 751/1877 [1:08:21<2:03:52,  6.60s/it][A
 40%|████      | 752/1877 [1:08:24<1:43:42,  5.53s/it][A
 40%|████      | 753/1877 [1:08:26<1:22:43,  4.42s/it][A
 40%|████      | 754/1877 [1:08:28<1:10:53,  3.79s/it][A
 40%|████      | 755/1877 [1:08:30<1:00:50,  3.25s/it][A
 40%|████      | 756/1877 [1:08:32<51:51,  2.78s/it]  [A
 40%|████      | 757/1877 [1:08:34<49:20,  2.64s/it][A
 40%|████      | 758/1877 [1:08:36<45:56,  2.46s/it][A
 40%|████      | 759/1877 [1:08:38<43:40,  2.34s/it][A
 40%|████      | 760/1877 [1:08:40<39:32,  2.12s/it][A
 41%|████      | 761/1877 [1:08:42<39:17,  2.11s/it][A
 41%|████      | 762/1877 [1:08:44<39:36,  2.13s/it][A
 41%|████      | 763/1877 [1:08:46<38:51,  2.09s/it][A
 41%|████      | 764/1877 [

retry!
retry!



 43%|████▎     | 798/1877 [1:11:21<7:21:13, 24.54s/it][A
 43%|████▎     | 799/1877 [1:11:25<5:28:33, 18.29s/it][A
 43%|████▎     | 800/1877 [1:11:37<4:52:34, 16.30s/it][A
 43%|████▎     | 801/1877 [1:11:39<3:34:38, 11.97s/it][A
 43%|████▎     | 802/1877 [1:11:42<2:46:09,  9.27s/it][A
 43%|████▎     | 803/1877 [1:11:44<2:07:27,  7.12s/it][A
 43%|████▎     | 804/1877 [1:11:45<1:38:32,  5.51s/it][A
 43%|████▎     | 805/1877 [1:11:48<1:23:09,  4.65s/it][A
 43%|████▎     | 806/1877 [1:11:51<1:11:19,  4.00s/it][A
 43%|████▎     | 807/1877 [1:11:52<58:30,  3.28s/it]  [A
 43%|████▎     | 808/1877 [1:11:54<49:34,  2.78s/it][A
 43%|████▎     | 809/1877 [1:11:57<51:23,  2.89s/it][A
 43%|████▎     | 810/1877 [1:12:00<52:01,  2.93s/it][A
 43%|████▎     | 811/1877 [1:12:02<46:04,  2.59s/it][A
 43%|████▎     | 812/1877 [1:12:05<50:25,  2.84s/it][A
 43%|████▎     | 813/1877 [1:12:08<48:24,  2.73s/it][A
 43%|████▎     | 814/1877 [1:12:09<42:03,  2.37s/it][A
 43%|████▎     | 815/1877 [

retry!
retry!



 44%|████▍     | 826/1877 [1:13:40<6:13:57, 21.35s/it][A
 44%|████▍     | 827/1877 [1:13:53<5:28:50, 18.79s/it][A
 44%|████▍     | 828/1877 [1:13:57<4:08:44, 14.23s/it][A
 44%|████▍     | 829/1877 [1:13:58<3:02:00, 10.42s/it][A
 44%|████▍     | 830/1877 [1:14:01<2:20:54,  8.07s/it][A
 44%|████▍     | 831/1877 [1:14:04<1:53:22,  6.50s/it][A
 44%|████▍     | 832/1877 [1:14:07<1:34:53,  5.45s/it][A
 44%|████▍     | 833/1877 [1:14:08<1:14:32,  4.28s/it][A
 44%|████▍     | 834/1877 [1:14:11<1:09:01,  3.97s/it][A
 44%|████▍     | 835/1877 [1:14:13<57:27,  3.31s/it]  [A
 45%|████▍     | 836/1877 [1:14:15<50:40,  2.92s/it][A
 45%|████▍     | 837/1877 [1:14:17<43:42,  2.52s/it][A
 45%|████▍     | 838/1877 [1:14:20<47:27,  2.74s/it][A
 45%|████▍     | 839/1877 [1:14:22<43:38,  2.52s/it][A
 45%|████▍     | 840/1877 [1:14:24<40:07,  2.32s/it][A
 45%|████▍     | 841/1877 [1:14:27<45:38,  2.64s/it][A
 45%|████▍     | 842/1877 [1:14:29<40:30,  2.35s/it][A
 45%|████▍     | 843/1877 [

retry!
retry!



 47%|████▋     | 877/1877 [1:17:05<5:48:17, 20.90s/it][A
 47%|████▋     | 878/1877 [1:17:18<5:08:49, 18.55s/it][A
 47%|████▋     | 879/1877 [1:17:20<3:46:01, 13.59s/it][A
 47%|████▋     | 880/1877 [1:17:22<2:46:01,  9.99s/it][A
 47%|████▋     | 881/1877 [1:17:24<2:10:08,  7.84s/it][A
 47%|████▋     | 882/1877 [1:17:26<1:39:01,  5.97s/it][A
 47%|████▋     | 883/1877 [1:17:28<1:17:53,  4.70s/it][A
 47%|████▋     | 884/1877 [1:17:31<1:11:38,  4.33s/it][A
 47%|████▋     | 885/1877 [1:17:33<58:15,  3.52s/it]  [A
 47%|████▋     | 886/1877 [1:17:34<48:31,  2.94s/it][A
 47%|████▋     | 887/1877 [1:17:46<1:29:17,  5.41s/it][A
 47%|████▋     | 888/1877 [1:17:47<1:11:09,  4.32s/it][A
 47%|████▋     | 889/1877 [1:17:49<58:47,  3.57s/it]  [A
 47%|████▋     | 890/1877 [1:17:51<49:45,  3.02s/it][A
 47%|████▋     | 891/1877 [1:17:53<46:30,  2.83s/it][A
 48%|████▊     | 892/1877 [1:17:56<47:05,  2.87s/it][A
 48%|████▊     | 893/1877 [1:17:58<40:52,  2.49s/it][A
 48%|████▊     | 894/18

retry!
retry!



 50%|█████     | 945/1877 [1:22:02<5:27:48, 21.10s/it][A
 50%|█████     | 946/1877 [1:22:04<3:56:48, 15.26s/it][A
 50%|█████     | 947/1877 [1:22:07<2:59:07, 11.56s/it][A
 51%|█████     | 948/1877 [1:22:10<2:18:41,  8.96s/it][A
 51%|█████     | 949/1877 [1:22:12<1:49:25,  7.08s/it][A
 51%|█████     | 950/1877 [1:22:15<1:28:21,  5.72s/it][A
 51%|█████     | 951/1877 [1:22:16<1:09:21,  4.49s/it][A
 51%|█████     | 952/1877 [1:22:19<1:01:26,  3.99s/it][A
 51%|█████     | 953/1877 [1:22:22<56:12,  3.65s/it]  [A
 51%|█████     | 954/1877 [1:22:24<46:49,  3.04s/it][A
 51%|█████     | 955/1877 [1:22:58<3:12:08, 12.50s/it][A
 51%|█████     | 956/1877 [1:23:19<3:47:40, 14.83s/it][A
 51%|█████     | 957/1877 [1:23:21<2:50:05, 11.09s/it][A
 51%|█████     | 958/1877 [1:23:23<2:09:56,  8.48s/it][A
 51%|█████     | 959/1877 [1:23:25<1:39:00,  6.47s/it][A
 51%|█████     | 960/1877 [1:23:27<1:18:23,  5.13s/it][A
 51%|█████     | 961/1877 [1:23:30<1:06:07,  4.33s/it][A
 51%|█████▏    

retry!
retry!



 53%|█████▎    | 990/1877 [1:26:41<5:13:48, 21.23s/it][A
 53%|█████▎    | 991/1877 [1:26:43<3:47:59, 15.44s/it][A
 53%|█████▎    | 992/1877 [1:26:45<2:49:18, 11.48s/it][A
 53%|█████▎    | 993/1877 [1:26:48<2:11:52,  8.95s/it][A
 53%|█████▎    | 994/1877 [1:26:50<1:40:46,  6.85s/it][A
 53%|█████▎    | 995/1877 [1:26:53<1:23:15,  5.66s/it][A
 53%|█████▎    | 996/1877 [1:26:55<1:05:14,  4.44s/it][A
 53%|█████▎    | 997/1877 [1:26:57<53:37,  3.66s/it]  [A
 53%|█████▎    | 998/1877 [1:26:59<49:55,  3.41s/it][A
 53%|█████▎    | 999/1877 [1:27:01<43:14,  2.96s/it][A
 53%|█████▎    | 1000/1877 [1:27:03<38:09,  2.61s/it][A

retry!
retry!



 53%|█████▎    | 1001/1877 [1:28:07<5:08:40, 21.14s/it][A
 53%|█████▎    | 1002/1877 [1:28:09<3:42:57, 15.29s/it][A
 53%|█████▎    | 1003/1877 [1:28:12<2:50:30, 11.71s/it][A
 53%|█████▎    | 1004/1877 [1:28:25<2:55:17, 12.05s/it][A
 54%|█████▎    | 1005/1877 [1:28:27<2:09:34,  8.92s/it][A
 54%|█████▎    | 1006/1877 [1:28:30<1:42:37,  7.07s/it][A
 54%|█████▎    | 1007/1877 [1:28:36<1:37:56,  6.75s/it][A
 54%|█████▎    | 1008/1877 [1:28:39<1:21:18,  5.61s/it][A
 54%|█████▍    | 1009/1877 [1:28:51<1:51:29,  7.71s/it][A
 54%|█████▍    | 1010/1877 [1:28:53<1:24:48,  5.87s/it][A
 54%|█████▍    | 1011/1877 [1:28:55<1:07:11,  4.65s/it][A
 54%|█████▍    | 1012/1877 [1:28:57<56:04,  3.89s/it]  [A
 54%|█████▍    | 1013/1877 [1:29:00<52:00,  3.61s/it][A
 54%|█████▍    | 1014/1877 [1:29:04<54:14,  3.77s/it][A
 54%|█████▍    | 1015/1877 [1:29:05<44:40,  3.11s/it][A
 54%|█████▍    | 1016/1877 [1:29:07<37:53,  2.64s/it][A
 54%|█████▍    | 1017/1877 [1:29:09<34:02,  2.38s/it][A
 54%|█

retry!
retry!



 55%|█████▌    | 1038/1877 [1:31:33<4:52:03, 20.89s/it][A
 55%|█████▌    | 1039/1877 [1:31:35<3:33:45, 15.30s/it][A
 55%|█████▌    | 1040/1877 [1:31:38<2:38:54, 11.39s/it][A
 55%|█████▌    | 1041/1877 [1:31:40<2:01:18,  8.71s/it][A
 56%|█████▌    | 1042/1877 [1:31:42<1:31:24,  6.57s/it][A
 56%|█████▌    | 1043/1877 [1:31:44<1:14:31,  5.36s/it][A
 56%|█████▌    | 1044/1877 [1:31:47<1:03:44,  4.59s/it][A
 56%|█████▌    | 1045/1877 [1:31:50<54:36,  3.94s/it]  [A
 56%|█████▌    | 1046/1877 [1:31:56<1:04:43,  4.67s/it][A
 56%|█████▌    | 1047/1877 [1:31:58<55:46,  4.03s/it]  [A
 56%|█████▌    | 1048/1877 [1:32:01<51:12,  3.71s/it][A
 56%|█████▌    | 1049/1877 [1:32:05<50:30,  3.66s/it][A
 56%|█████▌    | 1050/1877 [1:32:17<1:23:23,  6.05s/it][A
 56%|█████▌    | 1051/1877 [1:32:18<1:05:25,  4.75s/it][A
 56%|█████▌    | 1052/1877 [1:32:20<52:52,  3.85s/it]  [A
 56%|█████▌    | 1053/1877 [1:32:22<43:57,  3.20s/it][A
 56%|█████▌    | 1054/1877 [1:32:23<37:30,  2.73s/it][A
 56%

retry!
retry!



 59%|█████▊    | 1101/1877 [1:36:06<4:41:16, 21.75s/it][A
 59%|█████▊    | 1102/1877 [1:36:10<3:31:06, 16.34s/it][A
 59%|█████▉    | 1103/1877 [1:36:12<2:36:22, 12.12s/it][A
 59%|█████▉    | 1104/1877 [1:36:14<1:57:07,  9.09s/it][A
 59%|█████▉    | 1105/1877 [1:36:16<1:28:36,  6.89s/it][A
 59%|█████▉    | 1106/1877 [1:36:18<1:08:46,  5.35s/it][A
 59%|█████▉    | 1107/1877 [1:36:20<55:25,  4.32s/it]  [A
 59%|█████▉    | 1108/1877 [1:36:21<44:59,  3.51s/it][A
 59%|█████▉    | 1109/1877 [1:36:23<37:18,  2.92s/it][A
 59%|█████▉    | 1110/1877 [1:36:24<32:41,  2.56s/it][A
 59%|█████▉    | 1111/1877 [1:36:27<31:47,  2.49s/it][A
 59%|█████▉    | 1112/1877 [1:36:30<35:12,  2.76s/it][A
 59%|█████▉    | 1113/1877 [1:36:32<31:06,  2.44s/it][A
 59%|█████▉    | 1114/1877 [1:36:33<27:39,  2.17s/it][A

retry!
retry!



 59%|█████▉    | 1115/1877 [1:37:38<4:23:37, 20.76s/it][A
 59%|█████▉    | 1116/1877 [1:37:44<3:28:40, 16.45s/it][A
 60%|█████▉    | 1117/1877 [1:37:48<2:41:40, 12.76s/it][A
 60%|█████▉    | 1118/1877 [1:37:50<2:00:26,  9.52s/it][A
 60%|█████▉    | 1119/1877 [1:37:52<1:30:26,  7.16s/it][A
 60%|█████▉    | 1120/1877 [1:37:55<1:17:25,  6.14s/it][A
 60%|█████▉    | 1121/1877 [1:37:58<1:05:35,  5.21s/it][A
 60%|█████▉    | 1122/1877 [1:38:02<58:57,  4.69s/it]  [A
 60%|█████▉    | 1123/1877 [1:38:04<49:34,  3.95s/it][A
 60%|█████▉    | 1124/1877 [1:38:17<1:24:11,  6.71s/it][A
 60%|█████▉    | 1125/1877 [1:38:20<1:09:44,  5.57s/it][A
 60%|█████▉    | 1126/1877 [1:38:23<59:50,  4.78s/it]  [A
 60%|██████    | 1127/1877 [1:38:26<50:41,  4.06s/it][A
 60%|██████    | 1128/1877 [1:38:27<41:33,  3.33s/it][A
 60%|██████    | 1129/1877 [1:38:29<36:55,  2.96s/it][A
 60%|██████    | 1130/1877 [1:38:42<1:12:06,  5.79s/it][A
 60%|██████    | 1131/1877 [1:38:43<57:06,  4.59s/it]  [A
 60%

In [13]:
counts

Unnamed: 0,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
10504621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10504252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10504229,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10504227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10499883,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10499868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10497118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10492764,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10492763,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10489910,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
counts.to_csv(os.path.join(DATA_ROOT, "forward_info.csv"), index_label="pn")

In [29]:
forward_info = pd.read_csv("forward_info.csv")
forward_info.head()

FileNotFoundError: File b'forward_info.csv' does not exist

In [21]:
df.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0


In [24]:
final_df = pd.merge(df, forward_info, on="pn")
final_df.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,1988,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
final_df.set_index("pn")
final_df.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,1988,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
final_df.to_csv(os.path.join(DATA_ROOT, "data2.csv"))

In [47]:
final = pd.read_csv(os.path.join(DATA_ROOT, "data.csv"))
# del final["Unnamed: 0"]

In [48]:
final

Unnamed: 0.1,Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,10499868,2019-12-10,"According to one embodiment, an X-ray diagnost...",What is claimed is: \n\n 1. An X-ray diagnost...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,10497118,2019-12-03,A user had to manually select a cross-sectiona...,What is claimed is: \n\n 1. An image processi...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,7,10492764,2019-12-03,"According to one embodiment, an ultrasound dia...",What is claimed is: \n\n 1. An ultrasound dia...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,10492763,2019-12-03,The present invention provides improved medica...,What is claimed: \n\n 1. An ultrasound imagin...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,10489910,2019-11-26,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Class 정보 추가 (Class of the patent and the references)

In [143]:
import collections
def get_class_info(pn, log=False):
    p = uspto.Patent(pn)
    ref_us = p.ref_us
    p_cpcs = list(set(["{} {}".format(pc.level1, pc.level2) for pc in p.class_cpc]))
    ref_cpcs = []
    for ref in ref_us:
        ref_pn = ref.pn
        p_ref = None
        if "/" in ref_pn:
            if log: print("SKIP: {}".format(ref_pn))
            continue
        try:
            p_ref = uspto.Patent(ref_pn, use_db=False)
            if log: print("pn: {} was aleady existed".format(ref_pn))
        except KeyboardInterrupt: break
        except (FileNotFoundError, IndexError): # TODO 제대로 크롤링 하지 못한 html이 그대로 남아 있는 경우가 존재.
            if log: print("pn: {} not found or exist but something wrong. and try crwal pn = {}".format(ref_pn, ref_pn))
            if uspto.crawl.save_html(ref_pn, overwrite=True, retry=3):
                if log: print("\tCrawling success! and parse again..")
                time.sleep(8)
                try:
                    p_ref = uspto.Patent(ref_pn, use_db=False)
                except:
                    if log: print("\tUnexpected error:", sys.exc_info()[0])
                    continue
            else:
                if log: print("\tCrawling failed. Continue.")
                continue
        except:
            if log: print("Unexpected error:", sys.exc_info()[0])
            continue
        
        if p_ref: ref_cpcs += ["{} {}".format(c.level1, c.level2) for c in p_ref.class_cpc]
    
    ref_cpcs_count = collections.Counter(ref_cpcs)
    ref_cpcs_unique = list(set(ref_cpcs))
    return p_cpcs, ref_cpcs, ref_cpcs_unique, ref_cpcs_count

In [144]:
import collections
pn = "4430749"
p_cpcs, ref_cpcs, ref_cpcs_unique, ref_cpcs_count = get_class_info(pn)
# print("ref_us:", ref_us)
print("p_cpcs:", p_cpcs)
print("ref_cpcs:", ref_cpcs)
print("ref_cpcs_count:", ref_cpcs_count)
print("ref_cpcs_unique:", ref_cpcs_unique)

p_cpcs: ['G06T 1', 'G06T 5', 'H04N 5']
ref_cpcs: ['H04N 5', 'G21K 4', 'A61B 6', 'G06T 11', 'Y10S 378', 'A61B 6', 'G06T 11', 'Y10S 378', 'H04N 5', 'G06T 5', 'H04N 5', 'G06T 5', 'A61B 6', 'A61B 6', 'A61B 6', 'H05G 1', 'A61B 6', 'H04N 5', 'A61B 6']
ref_cpcs_count: Counter({'A61B 6': 7, 'H04N 5': 4, 'G06T 11': 2, 'Y10S 378': 2, 'G06T 5': 2, 'G21K 4': 1, 'H05G 1': 1})
ref_cpcs_unique: ['G06T 11', 'A61B 6', 'Y10S 378', 'G21K 4', 'H05G 1', 'H04N 5', 'G06T 5']


In [64]:
final.head()

Unnamed: 0.1,Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [145]:
pns = list(final["pn"])
pns

[10504621,
 10504252,
 10504229,
 10504227,
 10499883,
 10499868,
 10497118,
 10492764,
 10492763,
 10489910,
 10489907,
 10485493,
 10484594,
 10482606,
 10482603,
 10481234,
 10478130,
 10478073,
 10478071,
 10475184,
 10475182,
 10470825,
 10468134,
 10467798,
 10467377,
 10463871,
 10463328,
 10463327,
 10460423,
 10460079,
 10459627,
 10456106,
 10456099,
 10456093,
 10456041,
 10455201,
 10453570,
 10453182,
 10452813,
 10452812,
 10448910,
 10445891,
 10445886,
 10445462,
 10444960,
 10442767,
 10438380,
 10438368,
 10438349,
 10438348,
 10436915,
 10433804,
 10433728,
 10430550,
 10429457,
 10426444,
 10424237,
 10417804,
 10417778,
 10417759,
 10417517,
 10417326,
 10416833,
 10413270,
 10413253,
 10413252,
 10413236,
 10413186,
 10412377,
 10410341,
 10409235,
 10405831,
 10402969,
 10401975,
 10398395,
 10398382,
 10398286,
 10395420,
 10395364,
 10390785,
 10390728,
 10388037,
 10387612,
 10387196,
 10383602,
 10383593,
 10380718,
 10379692,
 10371933,
 10371499,
 10368836,

In [146]:
class_df = pd.DataFrame(columns=["pn", "p_cpcs", "ref_cpcs", "ref_cpcs_unique", "ref_cpcs_count"])
class_df

Unnamed: 0,pn,p_cpcs,ref_cpcs,ref_cpcs_unique,ref_cpcs_count


In [147]:
class_df = pd.DataFrame(columns=["pn", "p_cpcs", "ref_cpcs", "ref_cpcs_unique", "ref_cpcs_count"])
for pn in tqdm(pns):
    row = [pn] + list(get_class_info(str(pn)))
    class_df = class_df.append(pd.Series(row, index=class_df.columns), ignore_index=True)

class_df


  0%|          | 0/1877 [00:00<?, ?it/s][A
  0%|          | 1/1877 [00:00<25:08,  1.24it/s][A
  0%|          | 2/1877 [00:01<28:24,  1.10it/s][A
  0%|          | 3/1877 [00:03<31:48,  1.02s/it][A
  0%|          | 5/1877 [00:03<24:29,  1.27it/s][A
  0%|          | 7/1877 [00:04<19:53,  1.57it/s][A
  0%|          | 9/1877 [00:05<19:50,  1.57it/s][A
  1%|          | 10/1877 [00:06<25:37,  1.21it/s][A
  1%|          | 11/1877 [00:07<23:07,  1.34it/s][A
  1%|          | 12/1877 [00:07<18:20,  1.70it/s][A
  1%|          | 13/1877 [00:07<13:46,  2.26it/s][A
  1%|          | 14/1877 [00:07<10:42,  2.90it/s][A
  1%|          | 15/1877 [00:08<10:13,  3.03it/s][A
  1%|          | 17/1877 [00:09<13:13,  2.34it/s][A
  1%|          | 18/1877 [00:09<11:17,  2.74it/s][A
  1%|          | 19/1877 [00:09<09:47,  3.16it/s][A
  1%|          | 20/1877 [00:10<11:03,  2.80it/s][A
  1%|          | 22/1877 [00:11<12:39,  2.44it/s][A
  1%|          | 23/1877 [00:12<15:45,  1.96it/s][A
  1%|▏ 

Unnamed: 0,pn,p_cpcs,ref_cpcs,ref_cpcs_unique,ref_cpcs_count
0,10504621,"[G06T 11, G06T 7, G06T 2210, G16H 30, G06T 2207]","[G06F 19, H04N 1, G06T 3, H04N 19, H04N 21, H0...","[G06F 19, G06T 11, G06T 3, G06T 7, A61B 6, H04...","{'G06F 19': 1, 'H04N 1': 1, 'G06T 3': 1, 'H04N..."
1,10504252,"[G06T 11, G06T 7, A61B 6, G06T 3, G06T 2207]","[A61B 6, G06T 7, G06T 2207, G06T 19, G06T 2219...","[G06T 7, A61B 6, A61B 8, G06F 3, G06T 2210, G0...","{'A61B 6': 2, 'G06T 7': 13, 'G06T 2207': 16, '..."
2,10504229,"[G06T 7, G06T 2207, A61B 2576, A61B 5]","[A61M 25, A61M 25, A61B 2090, A61B 5, A61N 1, ...","[A61B 2090, G06T 7, G01R 33, A61B 6, A61M 25, ...","{'A61M 25': 2, 'A61B 2090': 1, 'A61B 5': 5, 'A..."
3,10504227,"[G06T 7, G06T 2207, G06N 3, G06N 20]",[],[],{}
4,10499883,[A61B 8],"[A61B 8, G01S 15, G01S 15, G01S 15, G01S 7, G0...","[G01S 7, A61B 8, G01S 15]","{'A61B 8': 12, 'G01S 15': 4, 'G01S 7': 2}"
5,10499868,[A61B 6],[],[],{}
6,10497118,"[G06F 19, G06T 7, G16H 15, G16H 30, G06T 2207,...","[A61B 6, A61B 6, Y10S 378, G06F 17, G06F 17, G...","[G06F 19, G06T 7, A61B 6, Y10S 378, G06T 2207,...","{'A61B 6': 2, 'Y10S 378': 1, 'G06F 17': 3, 'G0..."
7,10492764,"[G06T 11, A61B 8, G06T 2210]",[],[],{}
8,10492763,"[G01S 7, A61B 8, G01S 15]","[G06T 5, Y10S 378, G06T 5, G06T 5, G06T 5, G06...","[G06T 11, G06T 7, Y10S 378, A61B 6, A61B 8, G0...","{'G06T 5': 11, 'Y10S 378': 1, 'G06T 2207': 14,..."
9,10489910,"[G06T 7, G06T 2207]","[A61B 5, A61B 5, A61B 5, G06T 7, G01R 33, G06T...","[G06T 7, G06K 9, G01R 33, A61B 8, A61N 5, A61N...","{'A61B 5': 7, 'G06T 7': 18, 'G01R 33': 7, 'G06..."


In [101]:
type(class_df[class_df['pn']==10504621]['p_cpcs'].item())

list

In [149]:
del class_df['ref_cpcs_count']

In [150]:
class_df.to_csv(os.path.join(DATA_ROOT, "class_info.csv"), index=False)

In [151]:
class_df[class_df["pn"]==10504621]["p_cpcs"].item()

['G06T 11', 'G06T 7', 'G06T 2210', 'G16H 30', 'G06T 2207']

In [84]:
final.to_csv(os.path.join(DATA_ROOT, "data.csv"))

In [154]:
final = pd.read_csv(os.path.join(DATA_ROOT, "data.csv"))
del final['p_cpcs']
del final['ref_cpcs']
del final['ref_cpcs_unique']

In [155]:
final.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,1988,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [156]:
final = pd.merge(final, class_df, on='pn')

In [157]:
final.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,1988,...,2014,2015,2016,2017,2018,2019,2020,p_cpcs,ref_cpcs,ref_cpcs_unique
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[G06T 11, G06T 7, G06T 2210, G16H 30, G06T 2207]","[G06F 19, H04N 1, G06T 3, H04N 19, H04N 21, H0...","[G06F 19, G06T 11, G06T 3, G06T 7, A61B 6, H04..."
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[G06T 11, G06T 7, A61B 6, G06T 3, G06T 2207]","[A61B 6, G06T 7, G06T 2207, G06T 19, G06T 2219...","[G06T 7, A61B 6, A61B 8, G06F 3, G06T 2210, G0..."
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,"[G06T 7, G06T 2207, A61B 2576, A61B 5]","[A61M 25, A61M 25, A61B 2090, A61B 5, A61N 1, ...","[A61B 2090, G06T 7, G01R 33, A61B 6, A61M 25, ..."
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[G06T 7, G06T 2207, G06N 3, G06N 20]",[],[]
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[A61B 8],"[A61B 8, G01S 15, G01S 15, G01S 15, G01S 7, G0...","[G01S 7, A61B 8, G01S 15]"


In [158]:
final.to_csv(os.path.join(DATA_ROOT, "data.csv"), index=False)

In [159]:
final = pd.read_csv(os.path.join(DATA_ROOT, "data.csv"))
final.head()

Unnamed: 0,pn,issued date,abstract,claims,f_citation,1984,1985,1986,1987,1988,...,2014,2015,2016,2017,2018,2019,2020,p_cpcs,ref_cpcs,ref_cpcs_unique
0,10504621,2019-12-10,"According to one embodiment, a medical image p...",What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"['G06T 11', 'G06T 7', 'G06T 2210', 'G16H 30', ...","['G06F 19', 'H04N 1', 'G06T 3', 'H04N 19', 'H0...","['G06F 19', 'G06T 11', 'G06T 3', 'G06T 7', 'A6..."
1,10504252,2019-12-10,An imaging data processing apparatus comprises...,The invention claimed is: \n\n 1. An imaging ...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"['G06T 11', 'G06T 7', 'A61B 6', 'G06T 3', 'G06...","['A61B 6', 'G06T 7', 'G06T 2207', 'G06T 19', '...","['G06T 7', 'A61B 6', 'A61B 8', 'G06F 3', 'G06T..."
2,10504229,2019-12-10,A medical image processing apparatus according...,What is claimed is: \n\n 1. A medical image p...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,"['G06T 7', 'G06T 2207', 'A61B 2576', 'A61B 5']","['A61M 25', 'A61M 25', 'A61B 2090', 'A61B 5', ...","['A61B 2090', 'G06T 7', 'G01R 33', 'A61B 6', '..."
3,10504227,2019-12-10,This disclosure generally pertains to methods ...,We claim: \n\n 1. A method for streamlining a...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"['G06T 7', 'G06T 2207', 'G06N 3', 'G06N 20']",[],[]
4,10499883,2019-12-10,The systems and methods described herein relat...,What is claimed is: \n\n 1. A method for spat...,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,['A61B 8'],"['A61B 8', 'G01S 15', 'G01S 15', 'G01S 15', 'G...","['G01S 7', 'A61B 8', 'G01S 15']"


In [96]:
import csv
f = open(os.path.join(DATA_ROOT, "data.csv"), "r")
csv_reader = csv.reader(f)

In [98]:
for row in csv_reader:
    print(row)
    break

['10504621', '2019-12-10', 'According to one embodiment, a medical image processing apparatus includes processing circuitry. The processing circuitry is configured to extract a first medical image relating to a region of interest from a second medical image. The processing circuitry is configured to add, to the second medical image, reconstruction matrix information added to the first medical image.', 'What is claimed is: \n\n 1.  A medical image processing apparatus comprising processing circuitry configured to: extract a first medical image relating to a region of interest from a second medical\nimage;  and add, to the first medical image, reconstruction matrix information added to the second medical image.\n\n\n 2.  The apparatus according to claim 1, wherein the processing circuitry is further configured to display the reconstruction matrix information together with the first medical image.\n\n\n 3.  The apparatus according to claim 2, wherein the processing circuitry further displ