In [15]:
import joblib
import os
from google.cloud import storage
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler

bucket_name = os.environ.get('bucket')

if bucket_name is None:
  bucket_name = 'prat-config-public'

storage_client = storage.Client()

dbscan_file = 'model__patent_cluster_dbscan.pkl'
embedding_pca_file = 'model__patent_cluster_embeddingpca.pkl'
embedding_pca_scaler_file = 'model__patent_cluster_embeddingscaler.pkl'
visualization_pca_file = 'model__patent_cluster_visualizationpca.pkl'

toload = [dbscan_file, embedding_pca_scaler_file, embedding_pca_file, visualization_pca_file]

bucket = storage_client.bucket(bucket_name)

for file in toload:
  bucket.blob(file.replace('model__', 'model/')).download_to_filename(file)

dbscan_model = joblib.load(dbscan_file)
embedding_pca = joblib.load(embedding_pca_file)
embedding_pca_scaler = joblib.load(embedding_pca_scaler_file)
visualization_pca = joblib.load(visualization_pca_file)


In [2]:
request = """
{"EmbeddingVector":[-0.03150254860520363,-0.01982196979224682,-0.08203531801700592,0.01211712695658207,-0.03837510943412781,0.02453438937664032,0.04865783080458641,-0.07021676748991013,-0.016709676012396812,0.032764822244644165,0.04311998188495636,0.0012508530635386705,-0.018806317821145058,-0.028321977704763412,-0.004712872207164764,0.01516870316118002,0.019676320254802704,0.055057063698768616,0.0052115474827587605,-0.08847249299287796,-0.007532770745456219,0.021321330219507217,0.029095971956849098,-0.04210447520017624,0.006095754913985729,0.018907537683844566,0.008374795317649841,-0.046063750982284546,-0.012812131084501743,0.008582656271755695,-0.023168349638581276,0.04227977246046066,-0.11258866637945175,0.006389610469341278,-0.02664816752076149,-0.047512393444776535,0.01639394648373127,-0.007179766893386841,-0.016983628273010254,0.04795726388692856,0.012135528028011322,0.0027490505017340183,-0.030441157519817352,-0.022961368784308434,0.020539971068501472,-0.07899577170610428,-0.05598357692360878,0.03608504682779312,-0.012202429585158825,-0.07573708146810532,0.007271073758602142,0.02606108784675598,0.03586791828274727,-0.026329191401600838,-0.030056841671466827,-0.051885608583688736,0.05172160267829895,0.030153337866067886,-0.011907798238098621,-3.4003493055934086E-05,-0.014864913187921047,0.07220831513404846,-0.053887512534856796,8.331702701980248E-05,0.0036624211352318525,-0.07755935192108154,-0.03449207544326782,0.05392896756529808,0.05656681954860687,-0.019798671826720238,0.035142604261636734,-0.030891848728060722,0.040966082364320755,-0.04870636761188507,-0.038645125925540924,-0.09561673551797867,-0.014676283113658428,0.024059712886810303,0.04075361043214798,0.05694122985005379,-0.007767323404550552,-0.019971465691924095,0.021848931908607483,-0.047190047800540924,-0.026966886594891548,0.04057137668132782,-0.011598339304327965,-0.02628866769373417,0.013195587322115898,0.052161552011966705,-0.043695975095033646,-0.012277260422706604,0.03425256162881851,-0.08795575797557831,0.01574220322072506,-0.013673659414052963,-0.010808667168021202,0.01400869619101286,0.013941054232418537,-0.003269428852945566,0.015042828395962715,-0.023883335292339325,-0.044555868953466415,0.009024706669151783,-0.00042065107845701277,-0.021978626027703285,-0.023980729281902313,0.0260565597563982,-0.052278146147727966,0.0420522540807724,-0.08388521522283554,-0.004075199365615845,-0.018055766820907593,-0.007325008045881987,0.035749416798353195,-0.018968643620610237,-0.02567855268716812,0.047760095447301865,0.04159153625369072,-0.007377624046057463,0.010186737403273582,-0.0046992055140435696,0.030498603358864784,-0.014181002974510193,0.0040596118196845055,0.019200043752789497,-0.007149620447307825,0.014452807605266571,0.06640499085187912,0.04306291416287422,-0.0050187427550554276,-0.07902079820632935,0.014135798439383507,-0.013284037820994854,0.046251680701971054,0.07321920990943909,0.06608247011899948,0.0752081573009491,0.09522350132465363,0.03525915741920471,-0.0146761080250144,0.011950129643082619,-0.027709290385246277,0.030722925439476967,-0.002222503302618861,0.020038511604070663,0.04962944611907005,-0.001554356305859983,0.03633538633584976,-0.013202408328652382,0.04239543527364731,-0.013120254501700401,-0.041323672980070114,0.03248297795653343,0.06952998042106628,-0.022847671061754227,0.004354468546807766,0.015516822226345539,-0.008922151289880276,0.02144038863480091,0.03310651332139969,-0.009697978384792805,-0.007874426431953907,0.04689418524503708,0.017209893092513084,-0.005121263675391674,0.04945172742009163,0.004025089554488659,-0.04313152655959129,0.00716909347102046,-0.007670027669519186,0.040690839290618896,-0.033414144068956375,-0.05544961988925934,-0.008068662136793137,-0.05666723474860191,-0.00760710658505559,0.032158076763153076,-0.015699375420808792,0.05909397825598717,-0.05255052447319031,-0.018212735652923584,0.012163682840764523,0.026987450197339058,0.0611138790845871,-0.02425939217209816,0.04608771204948425,-0.051193151623010635,-0.04643576592206955,-0.02697683311998844,-0.013741220347583294,0.022912882268428802,-0.02334931120276451,0.005637641996145248,-0.008054533042013645,0.08577865362167358,-0.023001674562692642,-0.020008068531751633,-0.011405144818127155,-0.021471353247761726,0.037319112569093704,0.09360263496637344,0.028408991172909737,0.01759052649140358,0.005581775680184364,-0.027959613129496574,0.0677385926246643,-0.02736249752342701,-0.02841790020465851,0.04304149001836777,-0.04440793767571449,0.009431703016161919,-0.02100367844104767,0.003092042403295636,0.042263466864824295,0.03414900228381157,0.036142412573099136,0.051605116575956345,-0.029664332047104836,-0.024180855602025986,-0.014700500294566154,0.016091197729110718,-0.02574680559337139,0.015211798250675201,0.012998359277844429,0.0016575104091316462,-0.0393880233168602,0.00831005722284317,-0.024279557168483734,-0.09277334064245224,-0.0359366238117218,0.047675881534814835,0.011333453468978405,-0.027524465695023537,0.020108919590711594,-0.017984680831432343,0.05317787453532219,0.012363829649984837,0.030053183436393738,0.0020089871250092983,-0.02438916638493538,0.01896473951637745,0.03964202478528023,-0.014330402947962284,0.011163088493049145,0.008068532682955265,-0.018460510298609734,0.044670142233371735,-0.046164702624082565,0.04571196064352989,-0.009666569530963898,-0.09042592346668243,-0.015706563368439674,-0.010884203016757965,-0.011062376201152802,0.034343548119068146,-0.01784161850810051,0.03455556556582451,0.011888960376381874,-0.027324439957737923,0.06473101675510406,0.005655116867274046,-0.02612576074898243,-0.04609985649585724,-0.037015561014413834,-0.01032201200723648,-0.03588660806417465,-0.04593542218208313,-0.006051554344594479,0.04571717977523804,-0.0010490642162039876,-0.015408506616950035,0.0380898155272007,0.02884080447256565,-0.01817399263381958,-0.0006106263608671725,0.0028757902327924967,0.05562397092580795,-0.023580273613333702,-0.011240270920097828,0.03513695299625397,-0.0033675185404717922,0.015397724695503712,-0.06334725022315979,0.014729345217347145,0.009339305572211742,0.014432081952691078,0.011665468104183674,0.0414145402610302,-0.034111201763153076,-0.0860515758395195,-0.023122942075133324,-0.02506796084344387,-0.03574231266975403,-0.001052595442160964,-0.06503544747829437,-0.014560624025762081,0.031568918377161026,0.025153718888759613,-0.014771216548979282,-0.045060232281684875,-0.030048545449972153,-0.012180263176560402,-0.043924037367105484,0.0018512163078412414,0.06159837916493416,-0.036707859486341476,-0.03718232735991478,0.051472850143909454,-0.013044961728155613,0.01725398376584053,0.022580089047551155,-0.01421355176717043,-0.012208002619445324,0.012979418970644474,0.039957836270332336,-0.01567208208143711,0.011329797096550465,-0.03434818610548973,0.049758318811655045,-0.03655629605054855,0.005925528705120087,0.0353211984038353,-0.01742464303970337,0.0063856118358671665,0.05587167665362358,-0.010682214982807636,0.09396777302026749,-0.02483789436519146,0.007354139816015959,-0.009586145170032978,-0.008595040999352932,0.006188373547047377,0.06374143809080124,0.004122749902307987,0.03320746868848801,-0.08406773954629898,0.00608209939673543,-0.029562316834926605,-0.012544969096779823,0.03585455194115639,-0.01966848410665989,-0.03594108298420906,-0.019218329340219498,-0.0030031767673790455,0.0011159838177263737,-0.02652725949883461,0.03258777782320976,0.08904442936182022,-0.0014606985496357083,0.047589194029569626,0.059508685022592545,-0.04370331019163132,0.015271428972482681,-0.03401970490813255,-0.035958100110292435,0.049497246742248535,-0.016318220645189285,-0.04147005453705788,-0.04226869344711304,-0.01896984688937664,0.032268449664115906,-0.0027485303580760956,0.015329377725720406,-0.0010501998476684093,4.679776247940026E-05,-0.020307593047618866,0.032321516424417496,-0.025376904755830765,0.020559854805469513,0.07834205031394958,-0.0631580799818039,-0.0028318678960204124,-0.03366130590438843,-0.033434730023145676,-0.035673175007104874,-0.044179029762744904,-0.040798112750053406,0.08163429796695709,0.013221386820077896,0.010067095048725605,-0.004295825492590666,0.029401980340480804,0.04289204627275467,0.027394995093345642,-0.014318005181849003,0.03858194127678871,0.04755161330103874,-0.0034462010953575373,0.03421032801270485,0.02072516642510891,-0.03463899344205856,0.09035702049732208,0.02219494991004467,-0.00270751491189003,0.013761867769062519,0.025008253753185272,0.03316250443458557,0.05232076346874237,0.02153724804520607,-0.024276718497276306,-0.03156523406505585,-0.09862151741981506,-0.061429861932992935,-0.013522800989449024,-0.04995110630989075,0.04657964035868645,-0.028627781197428703,-0.03337531536817551,-0.02917618863284588,0.024339046329259872,0.06533799320459366,0.032344717532396317,-0.03897835686802864,-0.07429056614637375,-0.03291476517915726,0.07265029847621918,0.0012057294370606542,0.009206537157297134,-0.01805129274725914,-0.02600676752626896,0.03666418418288231,-0.009328272193670273,-0.006268546916544437,-0.05013063922524452,-0.03857393562793732,-0.014137494377791882,-0.023497113958001137,0.010376053862273693,0.025097306817770004,0.02085036411881447,-0.00568913109600544,-0.02315414883196354,-0.009187772870063782,-0.02439146116375923,-0.0009996917797252536,-0.019229749217629433,0.04688346013426781,-0.06673502922058105,0.032509710639715195,0.03466169908642769,-0.028335006907582283,0.046371567994356155,-0.06149812042713165,0.009397117421030998,-0.0017854856560006738,0.04997217655181885,-0.0904352068901062,0.005249135196208954,-0.0760849118232727,0.026460541412234306,-0.029696064069867134,-0.015046336688101292,0.0007154226768761873,-0.02053571306169033,-0.04253735393285751,-0.03543568402528763,0.06068159267306328,0.023906689137220383,0.006431552581489086,0.03385638818144798,-0.014969761483371258,0.04678482562303543,-0.08237864077091217,0.0424550399184227,0.017108380794525146,0.053985755890607834,-0.0645924061536789,0.08468244969844818,0.054401058703660965,0.00646715285256505,0.0453140065073967,-0.015527981333434582,0.05500579997897148,-0.04775258153676987,-0.051724061369895935,-0.07185856997966766,0.0357937328517437,-0.04888548702001572,-0.03101177141070366,0.013788956217467785,0.004773677326738834,0.05741705372929573,-0.02777223289012909,0.0011020507663488388,-0.004849872551858425,-0.06267766654491425,-0.01131352037191391,-0.057962872087955475,0.007337092887610197,-0.01229002047330141,-0.017757827416062355,0.002553483471274376,-0.04962470009922981,-0.030415058135986328,0.0011930379550904036,-0.019632810726761818,0.05714728683233261,0.004310461226850748,-0.04496172443032265,0.011661573313176632,-0.009138071909546852,0.044615380465984344,0.0015021307626739144,0.060055021196603775,-0.005595225375145674,-0.016605308279395103,0.033914752304553986,0.0023626405745744705,0.02152898721396923,0.06446157395839691,-0.004051301162689924,-0.05514837056398392,-0.011106929741799831,0.0014383072266355157,0.004284873139113188,-0.0011189266806468368,0.07088369131088257,-0.005218968726694584,-0.0008589980425313115,0.02032029815018177,0.022968310862779617,-0.08301588147878647,-0.0032523609697818756,0.06030160188674927,-0.06525462120771408,-0.004523288458585739,0.024580009281635284,-0.05148361250758171,0.025891631841659546,0.003313213586807251,0.04541877284646034,-0.027760092169046402,-0.008132520131766796,-0.0039206696674227715,-0.016006965190172195,-0.009890981949865818,0.021983284503221512,0.042863670736551285,-0.02998664416372776,0.054320648312568665,-7.323229510802776E-05,0.04378532990813255,-0.008704214356839657,0.012221327051520348,0.03921180218458176,0.004029922187328339,-0.07888717204332352,-0.0033481318969279528,-0.012193221598863602,0.018943091854453087,-0.07164708524942398,0.024952786043286324,-0.10744789242744446,0.024563197046518326,-0.0004086692351847887,-0.05315009132027626,0.032780151814222336,0.04424115642905235,0.0012964695924893022,-0.01828978769481182,-0.025692157447338104,0.0016045956872403622,0.020458664745092392,0.08721727877855301,0.03277841955423355,-0.03583729267120361,-0.06982207298278809,0.04714679718017578,-0.006265534553676844,0.03000505268573761,0.005098547320812941,-0.0037060840986669064,0.015270798467099667,0.021291669458150864,-0.012833422049880028,-0.04197664186358452,0.008811169303953648,0.013568082824349403,0.008907189592719078,0.017934666946530342,0.020396985113620758,-0.00478506600484252,0.0304249320179224,-0.016826609149575233,0.0064025754109025,0.04088379815220833,-0.017617253586649895,-0.012205789797008038,-0.02190368063747883,-0.009943535551428795,0.00596153549849987,-0.07131177186965942,0.009499828331172466,-0.008777222596108913,0.003515553893521428,0.03717588633298874,0.027248643338680267,-0.008134543895721436,-0.0238361619412899,0.008048727177083492,-0.04125424847006798,0.030164992436766624,-0.011678761802613735,0.04522499069571495,-0.022993996739387512,-0.020175345242023468,-0.015772001817822456,-0.0020379475317895412,-0.00787788350135088,0.023983024060726166,-0.008780984207987785,0.026251252740621567,-0.004047957714647055,-0.011883520521223545,-0.02242031693458557,0.027984224259853363,0.005504589527845383,-0.06336709856987,-0.04020831361413002,0.004344692919403315,-0.038252826780080795,-0.00961214117705822,0.0036082498263567686,0.000635571894235909,-0.04256546497344971,-0.004689980298280716,0.010424208827316761,0.034208621829748154,-0.009196861647069454,0.014301909133791924,0.022671358659863472,-0.007203282322734594,-0.04298887401819229,-0.039392635226249695,0.026122909039258957,-0.014096233993768692,-0.04068237170577049,-0.036701641976833344,0.07199950516223907,-0.05452091619372368,0.006749626249074936,0.05005902051925659,-0.0345006138086319,-0.023096714168787003,0.029359320178627968,0.03301159292459488,-0.08954917639493942,-0.017800191417336464,0.02378121204674244,-0.012164991348981857,0.05071956291794777,0.013971228152513504,-0.024438155815005302,0.035483796149492264,-0.0005968784098513424,-0.005632800981402397,-0.0018419616390019655,0.0006982912891544402,-0.015737801790237427,-0.03330102562904358,0.07875651866197586,-0.022028574720025063,0.017384640872478485,0.019168011844158173,0.022214258089661598,-0.08296244591474533,-0.038786809891462326,-0.014190379530191422,0.04207096993923187,-0.033949099481105804,0.012013008818030357,0.01913481019437313,0.014634605497121811,0.03700564429163933,0.023376107215881348,-0.013724238611757755,0.0644034594297409,0.007652041502296925,-0.01038665883243084,0.04418382793664932,-0.04902888461947441,-0.001269592554308474,0.050551965832710266,0.04597550630569458,0.02890085242688656,-0.0054436675272881985,-0.0017111004563048482,-0.049787480384111404,-0.006203572265803814,0.04941413551568985,-0.04743072763085365,0.018380766734480858,0.026345008984208107,0.02854771353304386,0.01961228810250759,-0.024373946711421013,0.016054753214120865,-0.015229585580527782,0.0779736340045929,-0.0031059840694069862,-0.0707453265786171,0.016859102994203568,0.039436183869838715,0.0007286629406735301,0.010958321392536163,-0.010816289111971855,0.008221154101192951,0.03641418740153313,-0.015142083168029785,0.01589607074856758,-0.03041696362197399,-0.04754047840833664,0.03203968703746796,0.04622897133231163,-0.013459104113280773,0.0032362157944589853,-0.02301926724612713,-0.01322268508374691,0.03149254992604256,0.017987627536058426,0.03412358835339546,0.0508907325565815,-0.022670771926641464,-0.011316184885799885,-0.0015115741407498717,-0.0055686780251562595,0.028776731342077255,-0.01229994185268879,-0.01807677000761032,0.027569368481636047,0.014025427401065826,-0.03408198431134224,0.04711580276489258,-0.03712671622633934,0.07711067795753479,0.009964381344616413,0.0038293637335300446,-0.013304783031344414,-0.06933510303497314,0.010899790562689304,-0.005408019758760929,-0.01999550685286522,0.013978410512208939,0.0365874208509922,-0.06052529439330101,0.012254496105015278,0.0072211818769574165,0.03931755572557449,0.01214549969881773,-0.03254599869251251,-0.05023866519331932,-0.042301714420318604,0.03028564341366291,-0.011096484027802944,-0.029960578307509422,0.018551405519247055,-0.02265750616788864,0.022102441638708115,-0.023197587579488754,-0.024729572236537933,0.03444468975067139,0.0035791685804724693,0.05669688805937767,-0.004493966232985258,-0.00196249526925385,-0.06449144333600998,0.04098515585064888],"FeatureFlags":[-0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0]}
"""

In [3]:
import json
input = json.loads(request)

In [18]:
import numpy as np
import json

embedding = input["EmbeddingVector"]
features  = input["FeatureFlags"]

embedding = np.reshape(embedding, (1, len(embedding)))

embedding_transformed = embedding_pca.transform(embedding)
embedding_scaled = embedding_pca_scaler.transform(embedding_transformed)

features = np.concatenate([embedding_scaled[0], features])
dbscan_input = np.reshape(features, (1, len(features)))

def dbscan_predict(model, X):

    nr_samples = X.shape[0]

    y_new = np.ones(shape=nr_samples, dtype=int) * -1

    for i in range(nr_samples):
        diff = model.components_ - X[i, :]  # NumPy broadcasting

        dist = np.linalg.norm(diff, axis=1)  # Euclidean distance

        shortest_dist_idx = np.argmin(dist)

        if dist[shortest_dist_idx] < model.eps:
            y_new[i] = model.labels_[model.core_sample_indices_[shortest_dist_idx]]

    return y_new



result = {
  "cluster" : dbscan_predict(dbscan_model, dbscan_input)[0],
  "visualization_coord" : visualization_pca.transform(dbscan_input)[0]
}

result



{'cluster': 44,
 'visualization_coord': array([-0.34427831, -0.38379139,  0.69784889])}