Skip to content

Commit

Permalink
Integrate data from external sources to tracker DB (#104)
Browse files Browse the repository at this point in the history
* Adding entries.
* Linking IAB and truste datasets to trackers and companies
* Updating names from iab vendor list.
* Add new company columns
* Remove sequence table.
* Add json export of tracker db.
* Put data directory into _site tree with raw data and jsonified db.
* Clear removed trackers
  • Loading branch information
sammacbeth authored and ecnmst committed Jul 19, 2018
1 parent 3c5eebe commit 81dd599
Show file tree
Hide file tree
Showing 10 changed files with 2,349 additions and 820 deletions.
1 change: 0 additions & 1 deletion .gitignore
Expand Up @@ -3,7 +3,6 @@
.sass-cache/
__pycache__/
_site/
build/
dist/
whotrackme.egg-info/
.DS_Store
6 changes: 1 addition & 5 deletions whotracksme/data/assets/2018-06/global/companies.csv
Expand Up @@ -155,7 +155,6 @@ month,country,company,reach,site_reach,cookies,bad_qs,tracked,https,requests,req
2018-06,global,pulsepoint_ad_exchange,0.003420919848340509,0.005513508471848011,0.47902127011675727,0.002046083377897649,0.4793295326784566,0.7535594575478951,2.291269119216608,1.1842300073728005,893.4899004634439,1.115860221304018,0.4687788527382491,0.19249759427696678,0.1579249354411192,7.350299524705632e-05,0.6392562996942125,6.900281186458348e-05,0.0,0.24707956849241608,6.30025673546197e-05,3.9751619878510046e-05,1.0000015000611275,1.0,1.0,154,131
2018-06,global,oclasrv.com,0.0033639001984475223,0.002289647992947523,0.004095172136049109,0.0006986734357647575,0.004721384898890665,0.6245766771213435,1.3866120135219242,0.005598540413224149,6.854552360844998,1.3459180996082547,0.9151264324330921,0.999697953405499,0.00012661549163422463,0.0,4.957835515797952e-05,6.864695329566396e-06,0.0,0.0003676425720945558,7.627439255073773e-07,7.627439255073773e-07,1.0,1.0,1.0,155,280
2018-06,global,emarsys,0.0033524157025234345,0.0017631192166455303,0.7301818487960936,0.012094169511243093,0.7366031930689281,0.963761116808768,2.4022547413859083,1.1455831254113793,1529.136508288815,0.5845175955548072,0.029448636899386184,0.9969477567389674,5.663640955777679e-05,0.0,0.0023695449187956345,0.0003949241531326057,2.296070657747708e-06,0.7176047390898376,0.0,0.0,1.0,1.0,1.0,156,334
2018-06,global,marketgid_ru,0.003329028486181151,0.0041039157192909615,0.024913966850743565,0.004567367905878,0.025134396688928026,0.528822742810018,3.858115633177003,0.22187188093705804,4412.803606262982,3.6484051592913875,0.9481635342764545,0.8239551741280111,0.03281707020998639,3.0829347997826532e-06,0.34539968322844933,1.5414673998913266e-06,0.0016308725090850235,0.0003206252191773959,0.00018343462058706785,0.0,1.0,1.0,1.0,157,178
2018-06,global,advanced_hosters,0.0033190449800058544,0.0020068268216767386,0.0027938100178265795,0.0,0.0027938100178265795,0.9365393862276143,31.203268618571492,0.13850309298115143,457323506.38253707,7.230257410863237,0.006520693829653348,0.005719038879878538,0.0054569742434526355,0.0,0.29040858891721694,0.0,0.0,0.2352041707702845,0.004975362831967866,0.7725588176633111,1.1984896109538379,1.0,1.0,158,306
2018-06,global,browser-update,0.0032273075655130387,0.003523229697426478,0.8347466251132912,1.9080631568904932e-05,0.8347466251132912,0.9347339046922453,1.216288499149322,0.87721216072252,196.08694487287528,0.2887106263217312,0.01064540236281821,0.9995810211317995,0.0,0.0,0.002687983972269482,3.1801052614841553e-06,0.0,0.0017252071043551542,0.0,0.0,1.0,1.0,1.0,159,209
2018-06,global,drawbridge,0.0032146581989121108,0.008025802918774663,0.4645331992433494,0.0023577488845788535,0.46518289714180816,0.9563369489739721,2.2973724748381743,0.9277989288764377,35.079986271739735,1.2644485948487099,0.461159399468429,7.981546664112571e-06,0.0015069160101844536,7.981546664112572e-07,0.9978266248433622,2.3944639992337714e-06,0.0,5.5870826648788e-06,3.1926186656450287e-06,7.981546664112572e-07,1.0,1.0,1.0,160,93
Expand Down Expand Up @@ -245,7 +244,6 @@ month,country,company,reach,site_reach,cookies,bad_qs,tracked,https,requests,req
2018-06,global,perimeterx.net,0.0018906287270168802,0.0006152864843071859,0.8323057735604776,0.0,0.8323057735604776,0.9787408445849206,10.656441734443089,8.817891889764528,17631.896066954465,1.8212833120040606,3.935624047477197e-05,0.8369647381656463,0.0,0.367917064187314,0.009108934002298947,0.0,0.0,0.8246625202379289,0.0,0.0,1.0,1.0,1.0,244,525
2018-06,global,yahoo_japan,0.0018806067339249483,0.006318345315623914,0.6170259211709346,0.009802811367168153,0.6173219838106946,0.8610497535312632,5.1123209810178825,2.9879828583824612,9168.147471938855,1.6613439060894764,0.3984471037024202,0.9104772065875983,0.0063250986079598556,0.0,0.41735691101612243,0.003266239445094024,0.0,0.009228422559154544,1.3643439620275788e-06,1.3643439620275788e-06,1.2759672175432804,1.0,1.0,245,116
2018-06,global,sitescout,0.0018773327802164728,0.004627435759728371,0.6427303304600254,0.0003621816730606538,0.6428683695127768,0.91828088077116,12.75736151334537,11.37870945786187,19.395562522807197,1.4375947310083548,0.3310777843228638,0.015409805146259893,0.5055837480199596,1.366723294568505e-06,0.6018120017439389,1.366723294568505e-06,0.0,5.330220848817169e-05,6.833616472842525e-06,0.0,1.0,1.0,1.0,246,158
2018-06,global,rhythmone,0.0018650759801652286,0.003900826048431621,0.41004952538175815,0.017317375154766817,0.41406245700921723,0.774589352042922,5.86882789929839,2.0801141835190533,16592.047309120924,3.2583725409272253,0.6774439400192599,0.5492241023524557,0.1874783326454808,0.0,0.7597221075801348,0.31492777548493606,0.05304718668317513,0.01229880313660751,0.0,0.0002545054340349429,1.1817553996423167,1.176604759939469,1.0,247,188
2018-06,global,improve_digital,0.0018618199870178498,0.005107329130129331,0.2996343871747486,0.004324512045378436,0.30255184797782897,0.9289432232084903,5.460834776904515,1.562740566486272,1622.8662102363323,3.5453253788771426,0.6541217230245124,0.1351416904735327,0.004547766013304283,0.0,0.44669122460313854,5.512443652490039e-06,0.0,0.6109896076656042,1.515922004434761e-05,0.0,1.0,1.0,1.0,248,139
2018-06,global,dropbox.com,0.00185678333252746,0.0025468949093693538,0.720380063786156,0.0003247345467811206,0.7204104644671313,0.9899981759591415,105.02859874970291,72.05821868591674,714569.4591235208,30.917393058695424,0.0021321932156734857,0.8284199384248025,0.0010405323988348248,2.625513356953741e-05,0.5205094048652145,0.453593360491275,0.3078607869907195,0.519985684042959,0.05133016797758088,0.02059922505900496,1.005538451334037,1.0,1.0,249,260
2018-06,global,salesforce.com,0.0018514285195262152,0.00401666237921806,0.943352166977789,0.0,0.943352166977789,0.9858421992818547,153.34357057627625,141.6892476381723,202042.98512848868,11.77479035617623,0.011246138687132838,0.3807791502270708,0.4887171363991014,0.00038110759568225883,0.7821464256957985,0.2823134201149975,0.03885495876415815,0.14109018945897966,0.0028312829744685628,0.00017738826271756046,1.0207599701211645,1.0,1.0,250,181
Expand Down Expand Up @@ -422,7 +420,6 @@ month,country,company,reach,site_reach,cookies,bad_qs,tracked,https,requests,req
2018-06,global,airbnb,0.001024824484586099,0.00039564876619264033,0.4095634147684882,0.2399516296210987,0.5260478996940549,0.9974788317001236,84.36745464651068,5.125495095363754,1996092.573294143,18.846128617137936,3.755464200411599e-05,0.9677405625184644,0.5528143448717885,0.10934159201638384,0.9558107045751568,0.9459839065840798,0.5154449724348927,0.010617949115963728,0.2210190827654237,0.001141661116925126,1.8850227080401984,1.0,1.0,421,589
2018-06,global,mapbox,0.0010203189495452815,0.0029184737886453318,0.02034139545644291,0.003814797491336864,0.023889634916084514,0.9539961072468579,40.64705956314659,0.7190050847201895,736459.6874280168,12.37502451830952,0.002922079554998969,0.3063028401003868,0.0007041155554214383,0.0,0.5954403488389638,0.34761430561632745,0.000243925745985284,0.4913745844460874,0.002084684983729901,5.029396824438845e-06,1.0,1.0,1.0,422,242
2018-06,global,superfastcdn.com,0.0010099967585035913,0.0007958106361821549,0.0,0.0,0.0,0.3962386856993336,1.0835613341157762,0.0,2.374942523479635,1.0832082186978425,0.9338759624935538,0.9999923788039294,2.5403986901704353e-06,0.0,0.0,0.0,0.0,0.00022863588211533917,0.0,0.0,1.0,1.0,1.0,423,484
2018-06,global,bbelements.com,0.0010075695169611007,0.0011989812420362522,0.029071055506464338,0.0016450509685683218,0.029129625432589835,0.7804569982148906,2.4660663673658556,0.17498911363329625,2757.5389502741327,2.2829513131122785,0.9489397570112021,0.9658435469947262,0.02579368616196367,0.0,0.04926240090859781,0.00014515155605014605,0.0,0.023140213856625912,6.875600023427971e-05,0.00017316325984929702,1.0,1.0,1.0,424,408
2018-06,global,clicky,0.0010037054305308782,0.0045040775892804755,0.5112005255798378,0.10294053739004619,0.5158862641140938,0.7262306773998114,3.8063739336941156,2.2722508672322954,166.115571325223,1.129447041951803,0.11036665328858065,0.9888851112127959,0.00014315404141753178,0.0,0.08618895822202681,0.000304202338012255,0.0,0.002262345118830636,0.00014571036358570198,0.0,1.0028196233514917,1.0,1.0,425,163
2018-06,global,1000mercis,0.0010032076664090567,0.0007100616640415446,0.650804490105961,0.03514129408966401,0.6525001726373618,0.8519513137063834,5.386968564654611,2.799052668462095,2473.371684403557,2.1581511689467585,0.39122439532165537,0.910576404181149,0.0016624338543145274,0.0,0.7265603220518014,0.0,0.0,0.18273984444734304,0.0,0.0,1.0,1.0,1.0,426,498
2018-06,global,nanigans,0.0010029716133203577,0.0003339696809686926,0.025561459295320298,0.0022844658877823284,0.027495452812860546,0.994374534728742,1.4185330812661006,0.05592208768972195,31.483593032506953,1.345222959265901,0.9162243125497249,0.5601546171537038,7.41875820220465e-05,0.0,0.4448389745741249,4.093107973630152e-05,0.0,0.0007342012427699085,0.0,0.0,1.0,1.0,1.0,427,614
Expand Down Expand Up @@ -670,7 +667,6 @@ month,country,company,reach,site_reach,cookies,bad_qs,tracked,https,requests,req
2018-06,global,lkqd,0.000227855375455459,0.0018729380757028032,0.11963290355272789,0.011316930353020662,0.1235516018242216,0.7936264849952143,108.04026800292776,15.212060131749338,358763.9157705084,85.47993919261303,0.7315579077754631,0.393840436912336,9.008501773548787e-05,0.0,0.16958504588705592,0.0,0.0,0.5895839198243342,0.026732729013006024,0.0001463881538201678,1.0,1.0,1.0,669,321
2018-06,global,adtiger,0.00022419398678618429,0.00030689105818744726,0.43022271052209937,0.0033189132275858913,0.43225983657213485,0.9469431664721096,4.444883151365333,1.9835999908443773,7729.036565268146,0.7704113163496532,0.14788619560987892,0.8890338529149214,0.07073862986106343,0.0,0.8306324246377806,0.0,0.0,0.00029755773764563164,0.0,0.0,1.0,1.0,1.0,670,629
2018-06,global,metapeople,0.00021757680291928909,0.0005581205028801124,0.6486043467493721,0.001238222148846095,0.6492057689359544,0.9373223740846001,2.55381549310723,1.3397209872757934,536.736128963785,0.7231806978855883,0.07245368459533721,0.11200603780704961,0.7928867085696766,0.0,0.5905847946320122,8.254814325640633e-05,0.0,0.0004009481243882593,0.0,0.0,1.0886449132654865,1.0,1.0,671,539
2018-06,global,spots.im,0.00021747930273047867,0.0002798124354062019,0.6721723434126544,0.0,0.6721723434126544,0.9406920635669707,2.3955710763204774,1.5612132938497658,22.52060499522186,0.656646334988969,0.0,0.0,0.0,0.0,0.8948926982928469,0.0,0.0,1.1797878741402297e-05,0.0,0.0,1.0,1.0,1.0,672,640
2018-06,global,connextra,0.00021526502212670568,0.0004317535965676341,0.38155855920284154,0.007246895039214284,0.3849317027819495,0.9934563398412358,4.580740899663877,2.7615080216453314,41120.101349257435,1.8704379127035209,0.5079143722138788,0.957078833822022,0.2551789077212806,0.0,0.21001692531407185,0.02358816658323202,0.05158645021335431,0.14729790936613507,0.002884454933371475,0.0,1.0,1.0,1.0,673,578
2018-06,global,ve,0.00021480317912707747,0.0014141058563539237,0.973804916505411,0.0008122506509950071,0.9741154829307914,0.903772187582121,5.417891015074416,3.3090613727036002,151.9923433431281,1.8334288922334503,0.0014931078143290571,0.5864688597434243,0.005327408681526076,0.0,0.46255285601662727,0.0,0.0,0.0053751881315846056,8.36140376024272e-05,0.0,1.0,1.0,1.0,674,369
2018-06,global,othersearch.info,0.00021458765239391764,0.00010229701939581575,0.6935098165817729,0.5548102446373484,0.9056364636391898,0.9944878877011741,3.474639500681541,1.3741181816007844,1401.8940622234977,0.936018844010809,0.0,0.994105268192362,0.0,0.0,0.07769567400817849,0.0,0.0,0.8140708324365689,0.0,0.0,1.0,1.0,1.0,675,736
Expand Down Expand Up @@ -707,7 +703,7 @@ month,country,company,reach,site_reach,cookies,bad_qs,tracked,https,requests,req
2018-06,global,audiencesquare.com,0.00013397039101437235,0.0001263669063124783,0.0031217681081702224,0.0,0.0031217681081702224,0.1958287049450339,3.6987972574405332,0.008292795035814149,10223.00653081549,1.4871681924388096,0.8328800704791818,0.9939096793963305,0.021603401386601294,0.0,0.022024744321446355,0.0,0.0,0.07279656797027617,0.0,0.0,1.0,1.0,1.0,706,728
2018-06,global,tradetracker,0.00013206143994924242,0.0008469591458800628,0.2045269088789586,0.0019040217602486885,0.2059646395958811,0.7072275111715562,2.173887701573732,0.35886924421993394,4342.188925587721,1.5025646007382942,0.5290654750340004,0.5657276083155236,0.05170001942879347,0.0,0.5092481056926365,0.011696133670099087,1.9428793471925393e-05,0.0014377307169224792,0.0004274334563823587,0.0,1.0,1.0,1.0,707,470
2018-06,global,brightonclick.com,0.00013076827955028347,6.167908522394773e-05,0.0721657575638661,0.0,0.0721657575638661,0.2540713416787662,2.8393634972334496,0.11578307106698583,26.581995840364165,2.61733312404348,0.8765843895930621,0.9999411372287408,0.07018404426480399,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,708,761
2018-06,global,cpa_detective,0.00012632945516496795,0.0007386446547550814,0.07459988626208465,0.1079088471849866,0.17150052806889268,0.5566049232269071,8.560159233081485,0.4539767649687221,9346.714395970428,1.377792672028597,0.12895036152408806,0.9110000812413681,0.4814566577301162,0.0,0.16410756357137055,0.0,0.0,0.4492241449346007,0.007636688601836055,0.0,1.097266227963279,1.0,1.0,709,492
2018-06,global,impact,0.00012632945516496795,0.0007386446547550814,0.07459988626208465,0.1079088471849866,0.17150052806889268,0.5566049232269071,8.560159233081485,0.4539767649687221,9346.714395970428,1.377792672028597,0.12895036152408806,0.9110000812413681,0.4814566577301162,0.0,0.16410756357137055,0.0,0.0,0.4492241449346007,0.007636688601836055,0.0,1.097266227963279,1.0,1.0,709,492
2018-06,global,vicomi.com,0.0001231453042619757,8.274023627602744e-05,0.7615168246692364,0.09205125533909782,0.7659547869569747,0.6841337639337431,12.280758412334619,5.127346598604022,33256.65094280654,2.807459110323992,0.0634024377539327,0.9905198458172726,0.6809042608605063,0.0,0.24117095530784458,0.6744035837066361,0.16003750390665694,0.6857589332222106,0.0,0.0,1.0,1.0,1.0,710,747
2018-06,global,yieldr,0.00012073345748613954,0.0003655614075468122,0.23512910423971947,0.0032940176389331635,0.23676548719583465,0.9117840824566996,2.693720114759324,1.3803634045266178,33372.22106046116,1.2632876421209223,0.7138880034002762,0.9012432260121135,0.09975560514291786,0.0,0.14748698331739454,0.061906279885240674,0.03330145574327914,0.003209010732121985,0.0,0.0030602486452024225,1.0,1.0,1.0,711,605
2018-06,global,iotec,0.00012032806196424368,0.0007386446547550814,0.8940870418150415,0.005394801373222168,0.8950252681408193,0.9743053926690407,2.232488218862614,1.6994264025417405,91.36861632940273,0.4770880866579952,0.0002985265582020172,0.0,0.005672004605838326,0.0,0.9860545450668486,0.0,0.0,0.0,2.132332558585837e-05,0.0,1.0,1.0,1.0,712,492
Expand Down

0 comments on commit 81dd599

Please sign in to comment.